From: Tim Northover Date: Wed, 9 Mar 2016 20:06:10 +0000 (+0000) Subject: ARM & AArch64: fix IR-converted tests. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=14efd02e2628852717b247f227ff5116af4808e0;p=clang ARM & AArch64: fix IR-converted tests. My script was converting %a0 to [[A]]0 if it had seen %a defined before %a0. Oops. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@263056 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/aarch64-neon-2velem.c b/test/CodeGen/aarch64-neon-2velem.c index 56ce89d673..03c9a858e5 100644 --- a/test/CodeGen/aarch64-neon-2velem.c +++ b/test/CodeGen/aarch64-neon-2velem.c @@ -284,8 +284,8 @@ uint32x4_t test_vmulq_laneq_u32(uint32x4_t a, uint32x4_t v) { // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA]]1) -// CHECK: ret <2 x float> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) +// CHECK: ret <2 x float> [[FMLA2]] float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { return vfma_lane_f32(a, b, v, 1); } @@ -298,8 +298,8 @@ float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA]]1) -// CHECK: ret <4 x float> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) +// CHECK: ret <4 x float> [[FMLA2]] float32x4_t test_vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { return vfmaq_lane_f32(a, b, v, 1); } @@ -341,8 +341,8 @@ float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA]]1) -// CHECK: ret <2 x float> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) +// CHECK: ret <2 x float> [[FMLA2]] float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { return vfms_lane_f32(a, b, v, 1); } @@ -356,8 +356,8 @@ float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA]]1) -// CHECK: ret <4 x float> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) +// CHECK: ret <4 x float> [[FMLA2]] float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { return vfmsq_lane_f32(a, b, v, 1); } @@ -400,8 +400,8 @@ float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA]]1) -// CHECK: ret <2 x double> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]]) +// CHECK: ret <2 x double> [[FMLA2]] float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { return vfmaq_lane_f64(a, b, v, 0); } @@ -429,8 +429,8 @@ float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA]]1) -// CHECK: ret <2 x double> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLA]], <2 x double> [[LANE]], <2 x double> [[FMLA1]]) +// CHECK: ret <2 x double> [[FMLA2]] float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { return vfmsq_lane_f64(a, b, v, 0); } @@ -1906,8 +1906,8 @@ uint32x4_t test_vmulq_laneq_u32_0(uint32x4_t a, uint32x4_t v) { // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA]]1) -// CHECK: ret <2 x float> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) +// CHECK: ret <2 x float> [[FMLA2]] float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { return vfma_lane_f32(a, b, v, 0); } @@ -1920,8 +1920,8 @@ float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA]]1) -// CHECK: ret <4 x float> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) +// CHECK: ret <4 x float> [[FMLA2]] float32x4_t test_vfmaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { return vfmaq_lane_f32(a, b, v, 0); } @@ -1963,8 +1963,8 @@ float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA]]1) -// CHECK: ret <2 x float> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLA]], <2 x float> [[LANE]], <2 x float> [[FMLA1]]) +// CHECK: ret <2 x float> [[FMLA2]] float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { return vfms_lane_f32(a, b, v, 0); } @@ -1978,8 +1978,8 @@ float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> // CHECK: [[FMLA1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA]]1) -// CHECK: ret <4 x float> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLA]], <4 x float> [[LANE]], <4 x float> [[FMLA1]]) +// CHECK: ret <4 x float> [[FMLA2]] float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { return vfmsq_lane_f32(a, b, v, 0); } diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index cb3a2c318f..ae0391a8c6 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -4200,8 +4200,8 @@ uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N]]1, i32 0) -// CHECK: ret <1 x i64> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0) +// CHECK: ret <1 x i64> [[VSLI_N2]] poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) { return vsli_n_p64(a, b, 0); } @@ -4211,8 +4211,8 @@ poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N]]1, i32 0) -// CHECK: ret <2 x i64> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0) +// CHECK: ret <2 x i64> [[VSLI_N2]] poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) { return vsliq_n_p64(a, b, 0); } @@ -5653,7 +5653,7 @@ int8x8_t test_vrshr_n_s8(int8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) -// CHECK: ret <4 x i16> [[VRSHR_N]]1 +// CHECK: ret <4 x i16> [[VRSHR_N1]] int16x4_t test_vrshr_n_s16(int16x4_t a) { return vrshr_n_s16(a, 3); } @@ -5662,7 +5662,7 @@ int16x4_t test_vrshr_n_s16(int16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) -// CHECK: ret <2 x i32> [[VRSHR_N]]1 +// CHECK: ret <2 x i32> [[VRSHR_N1]] int32x2_t test_vrshr_n_s32(int32x2_t a) { return vrshr_n_s32(a, 3); } @@ -5678,7 +5678,7 @@ int8x16_t test_vrshrq_n_s8(int8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) -// CHECK: ret <8 x i16> [[VRSHR_N]]1 +// CHECK: ret <8 x i16> [[VRSHR_N1]] int16x8_t test_vrshrq_n_s16(int16x8_t a) { return vrshrq_n_s16(a, 3); } @@ -5687,7 +5687,7 @@ int16x8_t test_vrshrq_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) -// CHECK: ret <4 x i32> [[VRSHR_N]]1 +// CHECK: ret <4 x i32> [[VRSHR_N1]] int32x4_t test_vrshrq_n_s32(int32x4_t a) { return vrshrq_n_s32(a, 3); } @@ -5696,7 +5696,7 @@ int32x4_t test_vrshrq_n_s32(int32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) -// CHECK: ret <2 x i64> [[VRSHR_N]]1 +// CHECK: ret <2 x i64> [[VRSHR_N1]] int64x2_t test_vrshrq_n_s64(int64x2_t a) { return vrshrq_n_s64(a, 3); } @@ -5712,7 +5712,7 @@ int8x8_t test_vrshr_n_u8(int8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) -// CHECK: ret <4 x i16> [[VRSHR_N]]1 +// CHECK: ret <4 x i16> [[VRSHR_N1]] int16x4_t test_vrshr_n_u16(int16x4_t a) { return vrshr_n_u16(a, 3); } @@ -5721,7 +5721,7 @@ int16x4_t test_vrshr_n_u16(int16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) -// CHECK: ret <2 x i32> [[VRSHR_N]]1 +// CHECK: ret <2 x i32> [[VRSHR_N1]] int32x2_t test_vrshr_n_u32(int32x2_t a) { return vrshr_n_u32(a, 3); } @@ -5737,7 +5737,7 @@ int8x16_t test_vrshrq_n_u8(int8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) -// CHECK: ret <8 x i16> [[VRSHR_N]]1 +// CHECK: ret <8 x i16> [[VRSHR_N1]] int16x8_t test_vrshrq_n_u16(int16x8_t a) { return vrshrq_n_u16(a, 3); } @@ -5746,7 +5746,7 @@ int16x8_t test_vrshrq_n_u16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) -// CHECK: ret <4 x i32> [[VRSHR_N]]1 +// CHECK: ret <4 x i32> [[VRSHR_N1]] int32x4_t test_vrshrq_n_u32(int32x4_t a) { return vrshrq_n_u32(a, 3); } @@ -5755,7 +5755,7 @@ int32x4_t test_vrshrq_n_u32(int32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) -// CHECK: ret <2 x i64> [[VRSHR_N]]1 +// CHECK: ret <2 x i64> [[VRSHR_N1]] int64x2_t test_vrshrq_n_u64(int64x2_t a) { return vrshrq_n_u64(a, 3); } @@ -5774,7 +5774,7 @@ int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i16> [[TMP3]] int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { return vrsra_n_s16(a, b, 3); @@ -5786,7 +5786,7 @@ int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i32> [[TMP3]] int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { return vrsra_n_s32(a, b, 3); @@ -5806,7 +5806,7 @@ int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <8 x i16> [[TMP3]] int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { return vrsraq_n_s16(a, b, 3); @@ -5818,7 +5818,7 @@ int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i32> [[TMP3]] int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { return vrsraq_n_s32(a, b, 3); @@ -5830,7 +5830,7 @@ int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i64> [[TMP3]] int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { return vrsraq_n_s64(a, b, 3); @@ -5850,7 +5850,7 @@ int8x8_t test_vrsra_n_u8(int8x8_t a, int8x8_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i16> [[TMP3]] int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) { return vrsra_n_u16(a, b, 3); @@ -5862,7 +5862,7 @@ int16x4_t test_vrsra_n_u16(int16x4_t a, int16x4_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i32> [[TMP3]] int32x2_t test_vrsra_n_u32(int32x2_t a, int32x2_t b) { return vrsra_n_u32(a, b, 3); @@ -5882,7 +5882,7 @@ int8x16_t test_vrsraq_n_u8(int8x16_t a, int8x16_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <8 x i16> [[TMP3]] int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) { return vrsraq_n_u16(a, b, 3); @@ -5894,7 +5894,7 @@ int16x8_t test_vrsraq_n_u16(int16x8_t a, int16x8_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <4 x i32> [[TMP3]] int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) { return vrsraq_n_u32(a, b, 3); @@ -5906,7 +5906,7 @@ int32x4_t test_vrsraq_n_u32(int32x4_t a, int32x4_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <2 x i64> [[TMP3]] int64x2_t test_vrsraq_n_u64(int64x2_t a, int64x2_t b) { return vrsraq_n_u64(a, b, 3); @@ -5924,8 +5924,8 @@ int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N]]1, i32 3) -// CHECK: ret <4 x i16> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3) +// CHECK: ret <4 x i16> [[VSRI_N2]] int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { return vsri_n_s16(a, b, 3); } @@ -5935,8 +5935,8 @@ int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N]]1, i32 3) -// CHECK: ret <2 x i32> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3) +// CHECK: ret <2 x i32> [[VSRI_N2]] int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { return vsri_n_s32(a, b, 3); } @@ -5953,8 +5953,8 @@ int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N]]1, i32 3) -// CHECK: ret <8 x i16> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3) +// CHECK: ret <8 x i16> [[VSRI_N2]] int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { return vsriq_n_s16(a, b, 3); } @@ -5964,8 +5964,8 @@ int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N]]1, i32 3) -// CHECK: ret <4 x i32> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3) +// CHECK: ret <4 x i32> [[VSRI_N2]] int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { return vsriq_n_s32(a, b, 3); } @@ -5975,8 +5975,8 @@ int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N]]1, i32 3) -// CHECK: ret <2 x i64> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3) +// CHECK: ret <2 x i64> [[VSRI_N2]] int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { return vsriq_n_s64(a, b, 3); } @@ -5993,8 +5993,8 @@ int8x8_t test_vsri_n_u8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N]]1, i32 3) -// CHECK: ret <4 x i16> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3) +// CHECK: ret <4 x i16> [[VSRI_N2]] int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) { return vsri_n_u16(a, b, 3); } @@ -6004,8 +6004,8 @@ int16x4_t test_vsri_n_u16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N]]1, i32 3) -// CHECK: ret <2 x i32> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3) +// CHECK: ret <2 x i32> [[VSRI_N2]] int32x2_t test_vsri_n_u32(int32x2_t a, int32x2_t b) { return vsri_n_u32(a, b, 3); } @@ -6022,8 +6022,8 @@ int8x16_t test_vsriq_n_u8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N]]1, i32 3) -// CHECK: ret <8 x i16> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3) +// CHECK: ret <8 x i16> [[VSRI_N2]] int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) { return vsriq_n_u16(a, b, 3); } @@ -6033,8 +6033,8 @@ int16x8_t test_vsriq_n_u16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N]]1, i32 3) -// CHECK: ret <4 x i32> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3) +// CHECK: ret <4 x i32> [[VSRI_N2]] int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) { return vsriq_n_u32(a, b, 3); } @@ -6044,8 +6044,8 @@ int32x4_t test_vsriq_n_u32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N]]1, i32 3) -// CHECK: ret <2 x i64> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3) +// CHECK: ret <2 x i64> [[VSRI_N2]] int64x2_t test_vsriq_n_u64(int64x2_t a, int64x2_t b) { return vsriq_n_u64(a, b, 3); } @@ -6062,8 +6062,8 @@ poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N]]1, i32 15) -// CHECK: ret <4 x i16> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15) +// CHECK: ret <4 x i16> [[VSRI_N2]] poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { return vsri_n_p16(a, b, 15); } @@ -6080,8 +6080,8 @@ poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N]]1, i32 15) -// CHECK: ret <8 x i16> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15) +// CHECK: ret <8 x i16> [[VSRI_N2]] poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { return vsriq_n_p16(a, b, 15); } @@ -6098,8 +6098,8 @@ int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N]]1, i32 3) -// CHECK: ret <4 x i16> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3) +// CHECK: ret <4 x i16> [[VSLI_N2]] int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { return vsli_n_s16(a, b, 3); } @@ -6109,8 +6109,8 @@ int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N]]1, i32 3) -// CHECK: ret <2 x i32> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3) +// CHECK: ret <2 x i32> [[VSLI_N2]] int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { return vsli_n_s32(a, b, 3); } @@ -6127,8 +6127,8 @@ int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N]]1, i32 3) -// CHECK: ret <8 x i16> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3) +// CHECK: ret <8 x i16> [[VSLI_N2]] int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { return vsliq_n_s16(a, b, 3); } @@ -6138,8 +6138,8 @@ int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N]]1, i32 3) -// CHECK: ret <4 x i32> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3) +// CHECK: ret <4 x i32> [[VSLI_N2]] int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { return vsliq_n_s32(a, b, 3); } @@ -6149,8 +6149,8 @@ int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N]]1, i32 3) -// CHECK: ret <2 x i64> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3) +// CHECK: ret <2 x i64> [[VSLI_N2]] int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { return vsliq_n_s64(a, b, 3); } @@ -6167,8 +6167,8 @@ uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N]]1, i32 3) -// CHECK: ret <4 x i16> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3) +// CHECK: ret <4 x i16> [[VSLI_N2]] uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { return vsli_n_u16(a, b, 3); } @@ -6178,8 +6178,8 @@ uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N]]1, i32 3) -// CHECK: ret <2 x i32> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3) +// CHECK: ret <2 x i32> [[VSLI_N2]] uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { return vsli_n_u32(a, b, 3); } @@ -6196,8 +6196,8 @@ uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N]]1, i32 3) -// CHECK: ret <8 x i16> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3) +// CHECK: ret <8 x i16> [[VSLI_N2]] uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { return vsliq_n_u16(a, b, 3); } @@ -6207,8 +6207,8 @@ uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N]]1, i32 3) -// CHECK: ret <4 x i32> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3) +// CHECK: ret <4 x i32> [[VSLI_N2]] uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { return vsliq_n_u32(a, b, 3); } @@ -6218,8 +6218,8 @@ uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N]]1, i32 3) -// CHECK: ret <2 x i64> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3) +// CHECK: ret <2 x i64> [[VSLI_N2]] uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { return vsliq_n_u64(a, b, 3); } @@ -6236,8 +6236,8 @@ poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N]]1, i32 15) -// CHECK: ret <4 x i16> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15) +// CHECK: ret <4 x i16> [[VSLI_N2]] poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { return vsli_n_p16(a, b, 15); } @@ -6254,8 +6254,8 @@ poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N]]1, i32 15) -// CHECK: ret <8 x i16> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15) +// CHECK: ret <8 x i16> [[VSLI_N2]] poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { return vsliq_n_p16(a, b, 15); } @@ -6271,7 +6271,7 @@ int8x8_t test_vqshlu_n_s8(int8x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> ) -// CHECK: ret <4 x i16> [[VQSHLU_N]]1 +// CHECK: ret <4 x i16> [[VQSHLU_N1]] int16x4_t test_vqshlu_n_s16(int16x4_t a) { return vqshlu_n_s16(a, 3); } @@ -6280,7 +6280,7 @@ int16x4_t test_vqshlu_n_s16(int16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> ) -// CHECK: ret <2 x i32> [[VQSHLU_N]]1 +// CHECK: ret <2 x i32> [[VQSHLU_N1]] int32x2_t test_vqshlu_n_s32(int32x2_t a) { return vqshlu_n_s32(a, 3); } @@ -6296,7 +6296,7 @@ int8x16_t test_vqshluq_n_s8(int8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> ) -// CHECK: ret <8 x i16> [[VQSHLU_N]]1 +// CHECK: ret <8 x i16> [[VQSHLU_N1]] int16x8_t test_vqshluq_n_s16(int16x8_t a) { return vqshluq_n_s16(a, 3); } @@ -6305,7 +6305,7 @@ int16x8_t test_vqshluq_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> ) -// CHECK: ret <4 x i32> [[VQSHLU_N]]1 +// CHECK: ret <4 x i32> [[VQSHLU_N1]] int32x4_t test_vqshluq_n_s32(int32x4_t a) { return vqshluq_n_s32(a, 3); } @@ -6314,7 +6314,7 @@ int32x4_t test_vqshluq_n_s32(int32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> ) -// CHECK: ret <2 x i64> [[VQSHLU_N]]1 +// CHECK: ret <2 x i64> [[VQSHLU_N1]] int64x2_t test_vqshluq_n_s64(int64x2_t a) { return vqshluq_n_s64(a, 3); } @@ -6449,7 +6449,7 @@ uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) -// CHECK: ret <8 x i8> [[VQSHRUN_N]]1 +// CHECK: ret <8 x i8> [[VQSHRUN_N1]] int8x8_t test_vqshrun_n_s16(int16x8_t a) { return vqshrun_n_s16(a, 3); } @@ -6458,7 +6458,7 @@ int8x8_t test_vqshrun_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) -// CHECK: ret <4 x i16> [[VQSHRUN_N]]1 +// CHECK: ret <4 x i16> [[VQSHRUN_N1]] int16x4_t test_vqshrun_n_s32(int32x4_t a) { return vqshrun_n_s32(a, 9); } @@ -6467,7 +6467,7 @@ int16x4_t test_vqshrun_n_s32(int32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) -// CHECK: ret <2 x i32> [[VQSHRUN_N]]1 +// CHECK: ret <2 x i32> [[VQSHRUN_N1]] int32x2_t test_vqshrun_n_s64(int64x2_t a) { return vqshrun_n_s64(a, 19); } @@ -6476,7 +6476,7 @@ int32x2_t test_vqshrun_n_s64(int64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N]]1, <16 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) { return vqshrun_high_n_s16(a, b, 3); @@ -6486,7 +6486,7 @@ int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N]]1, <8 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) { return vqshrun_high_n_s32(a, b, 9); @@ -6496,7 +6496,7 @@ int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N]]1, <4 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) { return vqshrun_high_n_s64(a, b, 19); @@ -6506,7 +6506,7 @@ int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) -// CHECK: ret <8 x i8> [[VRSHRN_N]]1 +// CHECK: ret <8 x i8> [[VRSHRN_N1]] int8x8_t test_vrshrn_n_s16(int16x8_t a) { return vrshrn_n_s16(a, 3); } @@ -6515,7 +6515,7 @@ int8x8_t test_vrshrn_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) -// CHECK: ret <4 x i16> [[VRSHRN_N]]1 +// CHECK: ret <4 x i16> [[VRSHRN_N1]] int16x4_t test_vrshrn_n_s32(int32x4_t a) { return vrshrn_n_s32(a, 9); } @@ -6524,7 +6524,7 @@ int16x4_t test_vrshrn_n_s32(int32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) -// CHECK: ret <2 x i32> [[VRSHRN_N]]1 +// CHECK: ret <2 x i32> [[VRSHRN_N1]] int32x2_t test_vrshrn_n_s64(int64x2_t a) { return vrshrn_n_s64(a, 19); } @@ -6533,7 +6533,7 @@ int32x2_t test_vrshrn_n_s64(int64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) -// CHECK: ret <8 x i8> [[VRSHRN_N]]1 +// CHECK: ret <8 x i8> [[VRSHRN_N1]] uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { return vrshrn_n_u16(a, 3); } @@ -6542,7 +6542,7 @@ uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) -// CHECK: ret <4 x i16> [[VRSHRN_N]]1 +// CHECK: ret <4 x i16> [[VRSHRN_N1]] uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { return vrshrn_n_u32(a, 9); } @@ -6551,7 +6551,7 @@ uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) -// CHECK: ret <2 x i32> [[VRSHRN_N]]1 +// CHECK: ret <2 x i32> [[VRSHRN_N1]] uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { return vrshrn_n_u64(a, 19); } @@ -6560,7 +6560,7 @@ uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N]]1, <16 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) { return vrshrn_high_n_s16(a, b, 3); @@ -6570,7 +6570,7 @@ int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N]]1, <8 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) { return vrshrn_high_n_s32(a, b, 9); @@ -6580,7 +6580,7 @@ int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N]]1, <4 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) { return vrshrn_high_n_s64(a, b, 19); @@ -6590,7 +6590,7 @@ int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N]]1, <16 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { return vrshrn_high_n_u16(a, b, 3); @@ -6600,7 +6600,7 @@ uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N]]1, <8 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { return vrshrn_high_n_u32(a, b, 9); @@ -6610,7 +6610,7 @@ uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N]]1, <4 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { return vrshrn_high_n_u64(a, b, 19); @@ -6620,7 +6620,7 @@ uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) -// CHECK: ret <8 x i8> [[VQRSHRUN_N]]1 +// CHECK: ret <8 x i8> [[VQRSHRUN_N1]] int8x8_t test_vqrshrun_n_s16(int16x8_t a) { return vqrshrun_n_s16(a, 3); } @@ -6629,7 +6629,7 @@ int8x8_t test_vqrshrun_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) -// CHECK: ret <4 x i16> [[VQRSHRUN_N]]1 +// CHECK: ret <4 x i16> [[VQRSHRUN_N1]] int16x4_t test_vqrshrun_n_s32(int32x4_t a) { return vqrshrun_n_s32(a, 9); } @@ -6638,7 +6638,7 @@ int16x4_t test_vqrshrun_n_s32(int32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) -// CHECK: ret <2 x i32> [[VQRSHRUN_N]]1 +// CHECK: ret <2 x i32> [[VQRSHRUN_N1]] int32x2_t test_vqrshrun_n_s64(int64x2_t a) { return vqrshrun_n_s64(a, 19); } @@ -6647,7 +6647,7 @@ int32x2_t test_vqrshrun_n_s64(int64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N]]1, <16 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) { return vqrshrun_high_n_s16(a, b, 3); @@ -6657,7 +6657,7 @@ int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N]]1, <8 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) { return vqrshrun_high_n_s32(a, b, 9); @@ -6667,7 +6667,7 @@ int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N]]1, <4 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) { return vqrshrun_high_n_s64(a, b, 19); @@ -6677,7 +6677,7 @@ int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) -// CHECK: ret <8 x i8> [[VQSHRN_N]]1 +// CHECK: ret <8 x i8> [[VQSHRN_N1]] int8x8_t test_vqshrn_n_s16(int16x8_t a) { return vqshrn_n_s16(a, 3); } @@ -6686,7 +6686,7 @@ int8x8_t test_vqshrn_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) -// CHECK: ret <4 x i16> [[VQSHRN_N]]1 +// CHECK: ret <4 x i16> [[VQSHRN_N1]] int16x4_t test_vqshrn_n_s32(int32x4_t a) { return vqshrn_n_s32(a, 9); } @@ -6695,7 +6695,7 @@ int16x4_t test_vqshrn_n_s32(int32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) -// CHECK: ret <2 x i32> [[VQSHRN_N]]1 +// CHECK: ret <2 x i32> [[VQSHRN_N1]] int32x2_t test_vqshrn_n_s64(int64x2_t a) { return vqshrn_n_s64(a, 19); } @@ -6704,7 +6704,7 @@ int32x2_t test_vqshrn_n_s64(int64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) -// CHECK: ret <8 x i8> [[VQSHRN_N]]1 +// CHECK: ret <8 x i8> [[VQSHRN_N1]] uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { return vqshrn_n_u16(a, 3); } @@ -6713,7 +6713,7 @@ uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) -// CHECK: ret <4 x i16> [[VQSHRN_N]]1 +// CHECK: ret <4 x i16> [[VQSHRN_N1]] uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { return vqshrn_n_u32(a, 9); } @@ -6722,7 +6722,7 @@ uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) -// CHECK: ret <2 x i32> [[VQSHRN_N]]1 +// CHECK: ret <2 x i32> [[VQSHRN_N1]] uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { return vqshrn_n_u64(a, 19); } @@ -6731,7 +6731,7 @@ uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N]]1, <16 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) { return vqshrn_high_n_s16(a, b, 3); @@ -6741,7 +6741,7 @@ int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N]]1, <8 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) { return vqshrn_high_n_s32(a, b, 9); @@ -6751,7 +6751,7 @@ int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N]]1, <4 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) { return vqshrn_high_n_s64(a, b, 19); @@ -6761,7 +6761,7 @@ int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N]]1, <16 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { return vqshrn_high_n_u16(a, b, 3); @@ -6771,7 +6771,7 @@ uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N]]1, <8 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { return vqshrn_high_n_u32(a, b, 9); @@ -6781,7 +6781,7 @@ uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N]]1, <4 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { return vqshrn_high_n_u64(a, b, 19); @@ -6791,7 +6791,7 @@ uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) -// CHECK: ret <8 x i8> [[VQRSHRN_N]]1 +// CHECK: ret <8 x i8> [[VQRSHRN_N1]] int8x8_t test_vqrshrn_n_s16(int16x8_t a) { return vqrshrn_n_s16(a, 3); } @@ -6800,7 +6800,7 @@ int8x8_t test_vqrshrn_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) -// CHECK: ret <4 x i16> [[VQRSHRN_N]]1 +// CHECK: ret <4 x i16> [[VQRSHRN_N1]] int16x4_t test_vqrshrn_n_s32(int32x4_t a) { return vqrshrn_n_s32(a, 9); } @@ -6809,7 +6809,7 @@ int16x4_t test_vqrshrn_n_s32(int32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) -// CHECK: ret <2 x i32> [[VQRSHRN_N]]1 +// CHECK: ret <2 x i32> [[VQRSHRN_N1]] int32x2_t test_vqrshrn_n_s64(int64x2_t a) { return vqrshrn_n_s64(a, 19); } @@ -6818,7 +6818,7 @@ int32x2_t test_vqrshrn_n_s64(int64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) -// CHECK: ret <8 x i8> [[VQRSHRN_N]]1 +// CHECK: ret <8 x i8> [[VQRSHRN_N1]] uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { return vqrshrn_n_u16(a, 3); } @@ -6827,7 +6827,7 @@ uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) -// CHECK: ret <4 x i16> [[VQRSHRN_N]]1 +// CHECK: ret <4 x i16> [[VQRSHRN_N1]] uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { return vqrshrn_n_u32(a, 9); } @@ -6836,7 +6836,7 @@ uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) -// CHECK: ret <2 x i32> [[VQRSHRN_N]]1 +// CHECK: ret <2 x i32> [[VQRSHRN_N1]] uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { return vqrshrn_n_u64(a, 19); } @@ -6845,7 +6845,7 @@ uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N]]1, <16 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) { return vqrshrn_high_n_s16(a, b, 3); @@ -6855,7 +6855,7 @@ int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N]]1, <8 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) { return vqrshrn_high_n_s32(a, b, 9); @@ -6865,7 +6865,7 @@ int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N]]1, <4 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) { return vqrshrn_high_n_s64(a, b, 19); @@ -6875,7 +6875,7 @@ int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N]]1, <16 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { return vqrshrn_high_n_u16(a, b, 3); @@ -6885,7 +6885,7 @@ uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N]]1, <8 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { return vqrshrn_high_n_u32(a, b, 9); @@ -6895,7 +6895,7 @@ uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) -// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N]]1, <4 x i32> +// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { return vqrshrn_high_n_u64(a, b, 19); @@ -7129,7 +7129,7 @@ uint64x2_t test_vmovl_high_u32(uint32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31) -// CHECK: ret <2 x float> [[VCVT_N]]1 +// CHECK: ret <2 x float> [[VCVT_N1]] float32x2_t test_vcvt_n_f32_s32(int32x2_t a) { return vcvt_n_f32_s32(a, 31); } @@ -7138,7 +7138,7 @@ float32x2_t test_vcvt_n_f32_s32(int32x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31) -// CHECK: ret <4 x float> [[VCVT_N]]1 +// CHECK: ret <4 x float> [[VCVT_N1]] float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { return vcvtq_n_f32_s32(a, 31); } @@ -7147,7 +7147,7 @@ float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50) -// CHECK: ret <2 x double> [[VCVT_N]]1 +// CHECK: ret <2 x double> [[VCVT_N1]] float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) { return vcvtq_n_f64_s64(a, 50); } @@ -7156,7 +7156,7 @@ float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31) -// CHECK: ret <2 x float> [[VCVT_N]]1 +// CHECK: ret <2 x float> [[VCVT_N1]] float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) { return vcvt_n_f32_u32(a, 31); } @@ -7165,7 +7165,7 @@ float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31) -// CHECK: ret <4 x float> [[VCVT_N]]1 +// CHECK: ret <4 x float> [[VCVT_N1]] float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { return vcvtq_n_f32_u32(a, 31); } @@ -7174,7 +7174,7 @@ float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50) -// CHECK: ret <2 x double> [[VCVT_N]]1 +// CHECK: ret <2 x double> [[VCVT_N1]] float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) { return vcvtq_n_f64_u64(a, 50); } @@ -7183,7 +7183,7 @@ float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31) -// CHECK: ret <2 x i32> [[VCVT_N]]1 +// CHECK: ret <2 x i32> [[VCVT_N1]] int32x2_t test_vcvt_n_s32_f32(float32x2_t a) { return vcvt_n_s32_f32(a, 31); } @@ -7192,7 +7192,7 @@ int32x2_t test_vcvt_n_s32_f32(float32x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31) -// CHECK: ret <4 x i32> [[VCVT_N]]1 +// CHECK: ret <4 x i32> [[VCVT_N1]] int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { return vcvtq_n_s32_f32(a, 31); } @@ -7201,7 +7201,7 @@ int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50) -// CHECK: ret <2 x i64> [[VCVT_N]]1 +// CHECK: ret <2 x i64> [[VCVT_N1]] int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) { return vcvtq_n_s64_f64(a, 50); } @@ -7210,7 +7210,7 @@ int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31) -// CHECK: ret <2 x i32> [[VCVT_N]]1 +// CHECK: ret <2 x i32> [[VCVT_N1]] uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) { return vcvt_n_u32_f32(a, 31); } @@ -7219,7 +7219,7 @@ uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31) -// CHECK: ret <4 x i32> [[VCVT_N]]1 +// CHECK: ret <4 x i32> [[VCVT_N1]] uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { return vcvtq_n_u32_f32(a, 31); } @@ -7228,7 +7228,7 @@ uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50) -// CHECK: ret <2 x i64> [[VCVT_N]]1 +// CHECK: ret <2 x i64> [[VCVT_N1]] uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) { return vcvtq_n_u64_f64(a, 50); } @@ -11780,8 +11780,8 @@ void test_vst1_p16(poly16_t *a, poly16x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) { @@ -11802,8 +11802,8 @@ void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -11827,8 +11827,8 @@ void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> @@ -11852,8 +11852,8 @@ void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> @@ -11875,8 +11875,8 @@ void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst2q_s8(int8_t *a, int8x16x2_t b) { @@ -11897,8 +11897,8 @@ void test_vst2q_s8(int8_t *a, int8x16x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -11922,8 +11922,8 @@ void test_vst2q_s16(int16_t *a, int16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> @@ -11947,8 +11947,8 @@ void test_vst2q_s32(int32_t *a, int32x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> @@ -11972,8 +11972,8 @@ void test_vst2q_s64(int64_t *a, int64x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -11997,8 +11997,8 @@ void test_vst2q_f16(float16_t *a, float16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> @@ -12022,8 +12022,8 @@ void test_vst2q_f32(float32_t *a, float32x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> @@ -12045,8 +12045,8 @@ void test_vst2q_f64(float64_t *a, float64x2x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) { @@ -12067,8 +12067,8 @@ void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -12090,8 +12090,8 @@ void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst2_u8(uint8_t *a, uint8x8x2_t b) { @@ -12112,8 +12112,8 @@ void test_vst2_u8(uint8_t *a, uint8x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -12137,8 +12137,8 @@ void test_vst2_u16(uint16_t *a, uint16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> @@ -12162,8 +12162,8 @@ void test_vst2_u32(uint32_t *a, uint32x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> @@ -12185,8 +12185,8 @@ void test_vst2_u64(uint64_t *a, uint64x1x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst2_s8(int8_t *a, int8x8x2_t b) { @@ -12207,8 +12207,8 @@ void test_vst2_s8(int8_t *a, int8x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -12232,8 +12232,8 @@ void test_vst2_s16(int16_t *a, int16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> @@ -12257,8 +12257,8 @@ void test_vst2_s32(int32_t *a, int32x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> @@ -12282,8 +12282,8 @@ void test_vst2_s64(int64_t *a, int64x1x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -12307,8 +12307,8 @@ void test_vst2_f16(float16_t *a, float16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> @@ -12332,8 +12332,8 @@ void test_vst2_f32(float32_t *a, float32x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> @@ -12355,8 +12355,8 @@ void test_vst2_f64(float64_t *a, float64x1x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst2_p8(poly8_t *a, poly8x8x2_t b) { @@ -12377,8 +12377,8 @@ void test_vst2_p8(poly8_t *a, poly8x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -12400,11 +12400,11 @@ void test_vst2_p16(poly16_t *a, poly16x4x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) { @@ -12425,17 +12425,17 @@ void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) { vst3q_u16(a, b); @@ -12455,17 +12455,17 @@ void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) { vst3q_u32(a, b); @@ -12485,17 +12485,17 @@ void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) { vst3q_u64(a, b); @@ -12513,11 +12513,11 @@ void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst3q_s8(int8_t *a, int8x16x3_t b) { @@ -12538,17 +12538,17 @@ void test_vst3q_s8(int8_t *a, int8x16x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_s16(int16_t *a, int16x8x3_t b) { vst3q_s16(a, b); @@ -12568,17 +12568,17 @@ void test_vst3q_s16(int16_t *a, int16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_s32(int32_t *a, int32x4x3_t b) { vst3q_s32(a, b); @@ -12598,17 +12598,17 @@ void test_vst3q_s32(int32_t *a, int32x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_s64(int64_t *a, int64x2x3_t b) { vst3q_s64(a, b); @@ -12628,17 +12628,17 @@ void test_vst3q_s64(int64_t *a, int64x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_f16(float16_t *a, float16x8x3_t b) { vst3q_f16(a, b); @@ -12658,17 +12658,17 @@ void test_vst3q_f16(float16_t *a, float16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> -// CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP1]]0, <4 x float> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_f32(float32_t *a, float32x4x3_t b) { vst3q_f32(a, b); @@ -12688,17 +12688,17 @@ void test_vst3q_f32(float32_t *a, float32x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> -// CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP1]]0, <2 x double> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_f64(float64_t *a, float64x2x3_t b) { vst3q_f64(a, b); @@ -12716,11 +12716,11 @@ void test_vst3q_f64(float64_t *a, float64x2x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) { @@ -12741,17 +12741,17 @@ void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) { vst3q_p16(a, b); @@ -12769,11 +12769,11 @@ void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst3_u8(uint8_t *a, uint8x8x3_t b) { @@ -12794,17 +12794,17 @@ void test_vst3_u8(uint8_t *a, uint8x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_u16(uint16_t *a, uint16x4x3_t b) { vst3_u16(a, b); @@ -12824,17 +12824,17 @@ void test_vst3_u16(uint16_t *a, uint16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_u32(uint32_t *a, uint32x2x3_t b) { vst3_u32(a, b); @@ -12854,17 +12854,17 @@ void test_vst3_u32(uint32_t *a, uint32x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_u64(uint64_t *a, uint64x1x3_t b) { vst3_u64(a, b); @@ -12882,11 +12882,11 @@ void test_vst3_u64(uint64_t *a, uint64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst3_s8(int8_t *a, int8x8x3_t b) { @@ -12907,17 +12907,17 @@ void test_vst3_s8(int8_t *a, int8x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_s16(int16_t *a, int16x4x3_t b) { vst3_s16(a, b); @@ -12937,17 +12937,17 @@ void test_vst3_s16(int16_t *a, int16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_s32(int32_t *a, int32x2x3_t b) { vst3_s32(a, b); @@ -12967,17 +12967,17 @@ void test_vst3_s32(int32_t *a, int32x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_s64(int64_t *a, int64x1x3_t b) { vst3_s64(a, b); @@ -12997,17 +12997,17 @@ void test_vst3_s64(int64_t *a, int64x1x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_f16(float16_t *a, float16x4x3_t b) { vst3_f16(a, b); @@ -13027,17 +13027,17 @@ void test_vst3_f16(float16_t *a, float16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> -// CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP1]]0, <2 x float> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_f32(float32_t *a, float32x2x3_t b) { vst3_f32(a, b); @@ -13057,17 +13057,17 @@ void test_vst3_f32(float32_t *a, float32x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> -// CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP1]]0, <1 x double> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_f64(float64_t *a, float64x1x3_t b) { vst3_f64(a, b); @@ -13085,11 +13085,11 @@ void test_vst3_f64(float64_t *a, float64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst3_p8(poly8_t *a, poly8x8x3_t b) { @@ -13110,17 +13110,17 @@ void test_vst3_p8(poly8_t *a, poly8x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_p16(poly16_t *a, poly16x4x3_t b) { vst3_p16(a, b); @@ -13138,14 +13138,14 @@ void test_vst3_p16(poly16_t *a, poly16x4x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) { @@ -13166,22 +13166,22 @@ void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> +// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) { vst4q_u16(a, b); @@ -13201,22 +13201,22 @@ void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x i32> -// CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> +// CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) { vst4q_u32(a, b); @@ -13236,22 +13236,22 @@ void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> +// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) { vst4q_u64(a, b); @@ -13269,14 +13269,14 @@ void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst4q_s8(int8_t *a, int8x16x4_t b) { @@ -13297,22 +13297,22 @@ void test_vst4q_s8(int8_t *a, int8x16x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> +// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_s16(int16_t *a, int16x8x4_t b) { vst4q_s16(a, b); @@ -13332,22 +13332,22 @@ void test_vst4q_s16(int16_t *a, int16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x i32> -// CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> +// CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_s32(int32_t *a, int32x4x4_t b) { vst4q_s32(a, b); @@ -13367,22 +13367,22 @@ void test_vst4q_s32(int32_t *a, int32x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> +// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_s64(int64_t *a, int64x2x4_t b) { vst4q_s64(a, b); @@ -13402,22 +13402,22 @@ void test_vst4q_s64(int64_t *a, int64x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> +// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_f16(float16_t *a, float16x8x4_t b) { vst4q_f16(a, b); @@ -13437,22 +13437,22 @@ void test_vst4q_f16(float16_t *a, float16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x float> -// CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP1]]1, <4 x float> [[TMP1]]2, <4 x float> [[TMP1]]3, <4 x float> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> +// CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_f32(float32_t *a, float32x4x4_t b) { vst4q_f32(a, b); @@ -13472,22 +13472,22 @@ void test_vst4q_f32(float32_t *a, float32x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x double> -// CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP1]]1, <2 x double> [[TMP1]]2, <2 x double> [[TMP1]]3, <2 x double> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> +// CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_f64(float64_t *a, float64x2x4_t b) { vst4q_f64(a, b); @@ -13505,14 +13505,14 @@ void test_vst4q_f64(float64_t *a, float64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) { @@ -13533,22 +13533,22 @@ void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> +// CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) { vst4q_p16(a, b); @@ -13566,14 +13566,14 @@ void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst4_u8(uint8_t *a, uint8x8x4_t b) { @@ -13594,22 +13594,22 @@ void test_vst4_u8(uint8_t *a, uint8x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> +// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_u16(uint16_t *a, uint16x4x4_t b) { vst4_u16(a, b); @@ -13629,22 +13629,22 @@ void test_vst4_u16(uint16_t *a, uint16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x i32> -// CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> +// CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_u32(uint32_t *a, uint32x2x4_t b) { vst4_u32(a, b); @@ -13664,22 +13664,22 @@ void test_vst4_u32(uint32_t *a, uint32x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> +// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_u64(uint64_t *a, uint64x1x4_t b) { vst4_u64(a, b); @@ -13697,14 +13697,14 @@ void test_vst4_u64(uint64_t *a, uint64x1x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst4_s8(int8_t *a, int8x8x4_t b) { @@ -13725,22 +13725,22 @@ void test_vst4_s8(int8_t *a, int8x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> +// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_s16(int16_t *a, int16x4x4_t b) { vst4_s16(a, b); @@ -13760,22 +13760,22 @@ void test_vst4_s16(int16_t *a, int16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x i32> -// CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> +// CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_s32(int32_t *a, int32x2x4_t b) { vst4_s32(a, b); @@ -13795,22 +13795,22 @@ void test_vst4_s32(int32_t *a, int32x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> +// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_s64(int64_t *a, int64x1x4_t b) { vst4_s64(a, b); @@ -13830,22 +13830,22 @@ void test_vst4_s64(int64_t *a, int64x1x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> +// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_f16(float16_t *a, float16x4x4_t b) { vst4_f16(a, b); @@ -13865,22 +13865,22 @@ void test_vst4_f16(float16_t *a, float16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x float> -// CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP1]]1, <2 x float> [[TMP1]]2, <2 x float> [[TMP1]]3, <2 x float> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> +// CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_f32(float32_t *a, float32x2x4_t b) { vst4_f32(a, b); @@ -13900,22 +13900,22 @@ void test_vst4_f32(float32_t *a, float32x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x double> -// CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP1]]1, <1 x double> [[TMP1]]2, <1 x double> [[TMP1]]3, <1 x double> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> +// CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_f64(float64_t *a, float64x1x4_t b) { vst4_f64(a, b); @@ -13933,14 +13933,14 @@ void test_vst4_f64(float64_t *a, float64x1x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst4_p8(poly8_t *a, poly8x8x4_t b) { @@ -13961,22 +13961,22 @@ void test_vst4_p8(poly8_t *a, poly8x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> +// CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_p16(poly16_t *a, poly16x4x4_t b) { vst4_p16(a, b); @@ -15470,8 +15470,8 @@ poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) { @@ -15492,8 +15492,8 @@ void test_vst1q_u8_x2(uint8_t *a, uint8x16x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -15518,8 +15518,8 @@ void test_vst1q_u16_x2(uint16_t *a, uint16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> @@ -15544,8 +15544,8 @@ void test_vst1q_u32_x2(uint32_t *a, uint32x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> @@ -15568,8 +15568,8 @@ void test_vst1q_u64_x2(uint64_t *a, uint64x2x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) { @@ -15590,8 +15590,8 @@ void test_vst1q_s8_x2(int8_t *a, int8x16x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -15616,8 +15616,8 @@ void test_vst1q_s16_x2(int16_t *a, int16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> @@ -15642,8 +15642,8 @@ void test_vst1q_s32_x2(int32_t *a, int32x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> @@ -15668,8 +15668,8 @@ void test_vst1q_s64_x2(int64_t *a, int64x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -15694,8 +15694,8 @@ void test_vst1q_f16_x2(float16_t *a, float16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> @@ -15720,8 +15720,8 @@ void test_vst1q_f32_x2(float32_t *a, float32x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> @@ -15744,8 +15744,8 @@ void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) { @@ -15766,8 +15766,8 @@ void test_vst1q_p8_x2(poly8_t *a, poly8x16x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -15792,8 +15792,8 @@ void test_vst1q_p16_x2(poly16_t *a, poly16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> @@ -15816,8 +15816,8 @@ void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) { @@ -15838,8 +15838,8 @@ void test_vst1_u8_x2(uint8_t *a, uint8x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -15864,8 +15864,8 @@ void test_vst1_u16_x2(uint16_t *a, uint16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> @@ -15890,8 +15890,8 @@ void test_vst1_u32_x2(uint32_t *a, uint32x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> @@ -15914,8 +15914,8 @@ void test_vst1_u64_x2(uint64_t *a, uint64x1x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) { @@ -15936,8 +15936,8 @@ void test_vst1_s8_x2(int8_t *a, int8x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -15962,8 +15962,8 @@ void test_vst1_s16_x2(int16_t *a, int16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> @@ -15988,8 +15988,8 @@ void test_vst1_s32_x2(int32_t *a, int32x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> @@ -16014,8 +16014,8 @@ void test_vst1_s64_x2(int64_t *a, int64x1x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -16040,8 +16040,8 @@ void test_vst1_f16_x2(float16_t *a, float16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> @@ -16066,8 +16066,8 @@ void test_vst1_f32_x2(float32_t *a, float32x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> @@ -16090,8 +16090,8 @@ void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i8* %a) // CHECK: ret void void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) { @@ -16112,8 +16112,8 @@ void test_vst1_p8_x2(poly8_t *a, poly8x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -16138,8 +16138,8 @@ void test_vst1_p16_x2(poly16_t *a, poly16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> @@ -16162,11 +16162,11 @@ void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) { @@ -16187,18 +16187,18 @@ void test_vst1q_u8_x3(uint8_t *a, uint8x16x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i16* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) // CHECK: ret void void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) { vst1q_u16_x3(a, b); @@ -16218,18 +16218,18 @@ void test_vst1q_u16_x3(uint16_t *a, uint16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, i32* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]]) // CHECK: ret void void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) { vst1q_u32_x3(a, b); @@ -16249,18 +16249,18 @@ void test_vst1q_u32_x3(uint32_t *a, uint32x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i64* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) // CHECK: ret void void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) { vst1q_u64_x3(a, b); @@ -16278,11 +16278,11 @@ void test_vst1q_u64_x3(uint64_t *a, uint64x2x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) { @@ -16303,18 +16303,18 @@ void test_vst1q_s8_x3(int8_t *a, int8x16x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i16* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) // CHECK: ret void void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) { vst1q_s16_x3(a, b); @@ -16334,18 +16334,18 @@ void test_vst1q_s16_x3(int16_t *a, int16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, i32* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i32* [[TMP12]]) // CHECK: ret void void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) { vst1q_s32_x3(a, b); @@ -16365,18 +16365,18 @@ void test_vst1q_s32_x3(int32_t *a, int32x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i64* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) // CHECK: ret void void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) { vst1q_s64_x3(a, b); @@ -16396,18 +16396,18 @@ void test_vst1q_s64_x3(int64_t *a, int64x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i16* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) // CHECK: ret void void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) { vst1q_f16_x3(a, b); @@ -16427,18 +16427,18 @@ void test_vst1q_f16_x3(float16_t *a, float16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float* -// CHECK: call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> [[TMP9]], <4 x float> [[TMP1]]0, <4 x float> [[TMP1]]1, float* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], float* [[TMP12]]) // CHECK: ret void void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) { vst1q_f32_x3(a, b); @@ -16458,18 +16458,18 @@ void test_vst1q_f32_x3(float32_t *a, float32x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double* -// CHECK: call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP1]]0, <2 x double> [[TMP1]]1, double* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], double* [[TMP12]]) // CHECK: ret void void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) { vst1q_f64_x3(a, b); @@ -16487,11 +16487,11 @@ void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) { @@ -16512,18 +16512,18 @@ void test_vst1q_p8_x3(poly8_t *a, poly8x16x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i16* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i16* [[TMP12]]) // CHECK: ret void void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) { vst1q_p16_x3(a, b); @@ -16543,18 +16543,18 @@ void test_vst1q_p16_x3(poly16_t *a, poly16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i64* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64* [[TMP12]]) // CHECK: ret void void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) { vst1q_p64_x3(a, b); @@ -16572,11 +16572,11 @@ void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) { @@ -16597,18 +16597,18 @@ void test_vst1_u8_x3(uint8_t *a, uint8x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i16* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) // CHECK: ret void void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) { vst1_u16_x3(a, b); @@ -16628,18 +16628,18 @@ void test_vst1_u16_x3(uint16_t *a, uint16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, i32* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]]) // CHECK: ret void void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) { vst1_u32_x3(a, b); @@ -16659,18 +16659,18 @@ void test_vst1_u32_x3(uint32_t *a, uint32x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i64* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) // CHECK: ret void void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) { vst1_u64_x3(a, b); @@ -16688,11 +16688,11 @@ void test_vst1_u64_x3(uint64_t *a, uint64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) { @@ -16713,18 +16713,18 @@ void test_vst1_s8_x3(int8_t *a, int8x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i16* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) // CHECK: ret void void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) { vst1_s16_x3(a, b); @@ -16744,18 +16744,18 @@ void test_vst1_s16_x3(int16_t *a, int16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, i32* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i32* [[TMP12]]) // CHECK: ret void void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) { vst1_s32_x3(a, b); @@ -16775,18 +16775,18 @@ void test_vst1_s32_x3(int32_t *a, int32x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i64* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) // CHECK: ret void void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) { vst1_s64_x3(a, b); @@ -16806,18 +16806,18 @@ void test_vst1_s64_x3(int64_t *a, int64x1x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i16* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) // CHECK: ret void void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) { vst1_f16_x3(a, b); @@ -16837,18 +16837,18 @@ void test_vst1_f16_x3(float16_t *a, float16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to float* -// CHECK: call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> [[TMP9]], <2 x float> [[TMP1]]0, <2 x float> [[TMP1]]1, float* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], float* [[TMP12]]) // CHECK: ret void void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) { vst1_f32_x3(a, b); @@ -16868,18 +16868,18 @@ void test_vst1_f32_x3(float32_t *a, float32x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to double* -// CHECK: call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP1]]0, <1 x double> [[TMP1]]1, double* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], double* [[TMP12]]) // CHECK: ret void void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) { vst1_f64_x3(a, b); @@ -16897,11 +16897,11 @@ void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i8* %a) // CHECK: ret void void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) { @@ -16922,18 +16922,18 @@ void test_vst1_p8_x3(poly8_t *a, poly8x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i16* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i16* [[TMP12]]) // CHECK: ret void void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) { vst1_p16_x3(a, b); @@ -16953,18 +16953,18 @@ void test_vst1_p16_x3(poly16_t *a, poly16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i64* [[TMP1]]2) +// CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64* [[TMP12]]) // CHECK: ret void void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) { vst1_p64_x3(a, b); @@ -16982,14 +16982,14 @@ void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) { @@ -17010,23 +17010,23 @@ void test_vst1q_u8_x4(uint8_t *a, uint8x16x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i16* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) // CHECK: ret void void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) { vst1q_u16_x4(a, b); @@ -17046,23 +17046,23 @@ void test_vst1q_u16_x4(uint16_t *a, uint16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x i32> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, i32* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]]) // CHECK: ret void void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) { vst1q_u32_x4(a, b); @@ -17082,23 +17082,23 @@ void test_vst1q_u32_x4(uint32_t *a, uint32x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i64* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) // CHECK: ret void void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) { vst1q_u64_x4(a, b); @@ -17116,14 +17116,14 @@ void test_vst1q_u64_x4(uint64_t *a, uint64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) { @@ -17144,23 +17144,23 @@ void test_vst1q_s8_x4(int8_t *a, int8x16x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i16* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) // CHECK: ret void void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) { vst1q_s16_x4(a, b); @@ -17180,23 +17180,23 @@ void test_vst1q_s16_x4(int16_t *a, int16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x i32> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, i32* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i32* [[TMP15]]) // CHECK: ret void void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) { vst1q_s32_x4(a, b); @@ -17216,23 +17216,23 @@ void test_vst1q_s32_x4(int32_t *a, int32x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i64* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) // CHECK: ret void void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) { vst1q_s64_x4(a, b); @@ -17252,23 +17252,23 @@ void test_vst1q_s64_x4(int64_t *a, int64x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i16* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) // CHECK: ret void void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) { vst1q_f16_x4(a, b); @@ -17288,23 +17288,23 @@ void test_vst1q_f16_x4(float16_t *a, float16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x float> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float* -// CHECK: call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> [[TMP1]]1, <4 x float> [[TMP1]]2, <4 x float> [[TMP1]]3, <4 x float> [[TMP1]]4, float* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], float* [[TMP15]]) // CHECK: ret void void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) { vst1q_f32_x4(a, b); @@ -17324,23 +17324,23 @@ void test_vst1q_f32_x4(float32_t *a, float32x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x double> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double* -// CHECK: call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP1]]1, <2 x double> [[TMP1]]2, <2 x double> [[TMP1]]3, <2 x double> [[TMP1]]4, double* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], double* [[TMP15]]) // CHECK: ret void void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) { vst1q_f64_x4(a, b); @@ -17358,14 +17358,14 @@ void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) { @@ -17386,23 +17386,23 @@ void test_vst1q_p8_x4(poly8_t *a, poly8x16x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i16* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i16* [[TMP15]]) // CHECK: ret void void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) { vst1q_p16_x4(a, b); @@ -17422,23 +17422,23 @@ void test_vst1q_p16_x4(poly16_t *a, poly16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64> +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i64* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64* [[TMP15]]) // CHECK: ret void void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) { vst1q_p64_x4(a, b); @@ -17456,14 +17456,14 @@ void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) { @@ -17484,23 +17484,23 @@ void test_vst1_u8_x4(uint8_t *a, uint8x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i16* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) // CHECK: ret void void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) { vst1_u16_x4(a, b); @@ -17520,23 +17520,23 @@ void test_vst1_u16_x4(uint16_t *a, uint16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x i32> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, i32* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]]) // CHECK: ret void void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) { vst1_u32_x4(a, b); @@ -17556,23 +17556,23 @@ void test_vst1_u32_x4(uint32_t *a, uint32x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i64* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) // CHECK: ret void void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) { vst1_u64_x4(a, b); @@ -17590,14 +17590,14 @@ void test_vst1_u64_x4(uint64_t *a, uint64x1x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) { @@ -17618,23 +17618,23 @@ void test_vst1_s8_x4(int8_t *a, int8x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i16* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) // CHECK: ret void void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) { vst1_s16_x4(a, b); @@ -17654,23 +17654,23 @@ void test_vst1_s16_x4(int16_t *a, int16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x i32> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i32* -// CHECK: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, i32* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i32* [[TMP15]]) // CHECK: ret void void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) { vst1_s32_x4(a, b); @@ -17690,23 +17690,23 @@ void test_vst1_s32_x4(int32_t *a, int32x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i64* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) // CHECK: ret void void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) { vst1_s64_x4(a, b); @@ -17726,23 +17726,23 @@ void test_vst1_s64_x4(int64_t *a, int64x1x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i16* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) // CHECK: ret void void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) { vst1_f16_x4(a, b); @@ -17762,23 +17762,23 @@ void test_vst1_f16_x4(float16_t *a, float16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x float> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to float* -// CHECK: call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> [[TMP1]]1, <2 x float> [[TMP1]]2, <2 x float> [[TMP1]]3, <2 x float> [[TMP1]]4, float* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], float* [[TMP15]]) // CHECK: ret void void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) { vst1_f32_x4(a, b); @@ -17798,23 +17798,23 @@ void test_vst1_f32_x4(float32_t *a, float32x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x double> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to double* -// CHECK: call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP1]]1, <1 x double> [[TMP1]]2, <1 x double> [[TMP1]]3, <1 x double> [[TMP1]]4, double* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], double* [[TMP15]]) // CHECK: ret void void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) { vst1_f64_x4(a, b); @@ -17832,14 +17832,14 @@ void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i8* %a) // CHECK: ret void void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) { @@ -17860,23 +17860,23 @@ void test_vst1_p8_x4(poly8_t *a, poly8x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i16* -// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i16* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i16* [[TMP15]]) // CHECK: ret void void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) { vst1_p16_x4(a, b); @@ -17896,23 +17896,23 @@ void test_vst1_p16_x4(poly16_t *a, poly16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64> +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> // CHECK: [[TMP15:%.*]] = bitcast i8* [[TMP2]] to i64* -// CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i64* [[TMP1]]5) +// CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64* [[TMP15]]) // CHECK: ret void void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) { vst1_p64_x4(a, b); @@ -18609,7 +18609,7 @@ int64_t test_vrshrd_n_s64(int64_t a) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) -// CHECK: ret <1 x i64> [[VRSHR_N]]1 +// CHECK: ret <1 x i64> [[VRSHR_N1]] int64x1_t test_vrshr_n_s64(int64x1_t a) { return vrshr_n_s64(a, 1); } @@ -18625,7 +18625,7 @@ uint64_t test_vrshrd_n_u64(uint64_t a) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) -// CHECK: ret <1 x i64> [[VRSHR_N]]1 +// CHECK: ret <1 x i64> [[VRSHR_N1]] uint64x1_t test_vrshr_n_u64(uint64x1_t a) { return vrshr_n_u64(a, 1); } @@ -18691,7 +18691,7 @@ int64_t test_vrsrad_n_s64(int64_t a, int64_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <1 x i64> [[TMP3]] int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { return vrsra_n_s64(a, b, 1); @@ -18711,7 +18711,7 @@ uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) { // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N]]1 +// CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] // CHECK: ret <1 x i64> [[TMP3]] uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { return vrsra_n_u64(a, b, 1); @@ -18798,7 +18798,7 @@ int8x16_t test_vqshlq_n_s8(int8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer) -// CHECK: ret <4 x i16> [[VQSHL_N]]1 +// CHECK: ret <4 x i16> [[VQSHL_N1]] int16x4_t test_vqshl_n_s16(int16x4_t a) { return vqshl_n_s16(a, 0); } @@ -18807,7 +18807,7 @@ int16x4_t test_vqshl_n_s16(int16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer) -// CHECK: ret <8 x i16> [[VQSHL_N]]1 +// CHECK: ret <8 x i16> [[VQSHL_N1]] int16x8_t test_vqshlq_n_s16(int16x8_t a) { return vqshlq_n_s16(a, 0); } @@ -18816,7 +18816,7 @@ int16x8_t test_vqshlq_n_s16(int16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer) -// CHECK: ret <2 x i32> [[VQSHL_N]]1 +// CHECK: ret <2 x i32> [[VQSHL_N1]] int32x2_t test_vqshl_n_s32(int32x2_t a) { return vqshl_n_s32(a, 0); } @@ -18825,7 +18825,7 @@ int32x2_t test_vqshl_n_s32(int32x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer) -// CHECK: ret <4 x i32> [[VQSHL_N]]1 +// CHECK: ret <4 x i32> [[VQSHL_N1]] int32x4_t test_vqshlq_n_s32(int32x4_t a) { return vqshlq_n_s32(a, 0); } @@ -18834,7 +18834,7 @@ int32x4_t test_vqshlq_n_s32(int32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer) -// CHECK: ret <2 x i64> [[VQSHL_N]]1 +// CHECK: ret <2 x i64> [[VQSHL_N1]] int64x2_t test_vqshlq_n_s64(int64x2_t a) { return vqshlq_n_s64(a, 0); } @@ -18857,7 +18857,7 @@ uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer) -// CHECK: ret <4 x i16> [[VQSHL_N]]1 +// CHECK: ret <4 x i16> [[VQSHL_N1]] uint16x4_t test_vqshl_n_u16(uint16x4_t a) { return vqshl_n_u16(a, 0); } @@ -18866,7 +18866,7 @@ uint16x4_t test_vqshl_n_u16(uint16x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer) -// CHECK: ret <8 x i16> [[VQSHL_N]]1 +// CHECK: ret <8 x i16> [[VQSHL_N1]] uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { return vqshlq_n_u16(a, 0); } @@ -18875,7 +18875,7 @@ uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer) -// CHECK: ret <2 x i32> [[VQSHL_N]]1 +// CHECK: ret <2 x i32> [[VQSHL_N1]] uint32x2_t test_vqshl_n_u32(uint32x2_t a) { return vqshl_n_u32(a, 0); } @@ -18884,7 +18884,7 @@ uint32x2_t test_vqshl_n_u32(uint32x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer) -// CHECK: ret <4 x i32> [[VQSHL_N]]1 +// CHECK: ret <4 x i32> [[VQSHL_N1]] uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { return vqshlq_n_u32(a, 0); } @@ -18893,7 +18893,7 @@ uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer) -// CHECK: ret <2 x i64> [[VQSHL_N]]1 +// CHECK: ret <2 x i64> [[VQSHL_N1]] uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { return vqshlq_n_u64(a, 0); } @@ -18902,7 +18902,7 @@ uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) -// CHECK: ret <1 x i64> [[VQSHL_N]]1 +// CHECK: ret <1 x i64> [[VQSHL_N1]] int64x1_t test_vqshl_n_s64(int64x1_t a) { return vqshl_n_s64(a, 1); } @@ -18943,7 +18943,7 @@ uint64_t test_vqshld_n_u64(uint64_t a) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) -// CHECK: ret <1 x i64> [[VQSHL_N]]1 +// CHECK: ret <1 x i64> [[VQSHL_N1]] uint64x1_t test_vqshl_n_u64(uint64x1_t a) { return vqshl_n_u64(a, 1); } @@ -18984,7 +18984,7 @@ int64_t test_vqshlud_n_s64(int64_t a) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> ) -// CHECK: ret <1 x i64> [[VQSHLU_N]]1 +// CHECK: ret <1 x i64> [[VQSHLU_N1]] uint64x1_t test_vqshlu_n_s64(int64x1_t a) { return vqshlu_n_s64(a, 1); } @@ -18992,9 +18992,9 @@ uint64x1_t test_vqshlu_n_s64(int64x1_t a) { // CHECK-LABEL: define i64 @test_vsrid_n_s64(i64 %a, i64 %b) #0 { // CHECK: [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64> // CHECK: [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64> -// CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S64]]1, i32 63) -// CHECK: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S64]]2 to i64 -// CHECK: ret i64 [[VSRID_N_S64]]3 +// CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63) +// CHECK: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64 +// CHECK: ret i64 [[VSRID_N_S643]] int64_t test_vsrid_n_s64(int64_t a, int64_t b) { return (int64_t)vsrid_n_s64(a, b, 63); } @@ -19004,8 +19004,8 @@ int64_t test_vsrid_n_s64(int64_t a, int64_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N]]1, i32 1) -// CHECK: ret <1 x i64> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1) +// CHECK: ret <1 x i64> [[VSRI_N2]] int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { return vsri_n_s64(a, b, 1); } @@ -19013,9 +19013,9 @@ int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { // CHECK-LABEL: define i64 @test_vsrid_n_u64(i64 %a, i64 %b) #0 { // CHECK: [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64> // CHECK: [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64> -// CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U64]]1, i32 63) -// CHECK: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U64]]2 to i64 -// CHECK: ret i64 [[VSRID_N_U64]]3 +// CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63) +// CHECK: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64 +// CHECK: ret i64 [[VSRID_N_U643]] uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) { return (uint64_t)vsrid_n_u64(a, b, 63); } @@ -19025,8 +19025,8 @@ uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N]]1, i32 1) -// CHECK: ret <1 x i64> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1) +// CHECK: ret <1 x i64> [[VSRI_N2]] uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { return vsri_n_u64(a, b, 1); } @@ -19034,9 +19034,9 @@ uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { // CHECK-LABEL: define i64 @test_vslid_n_s64(i64 %a, i64 %b) #0 { // CHECK: [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64> // CHECK: [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64> -// CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S64]]1, i32 63) -// CHECK: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S64]]2 to i64 -// CHECK: ret i64 [[VSLID_N_S64]]3 +// CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63) +// CHECK: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64 +// CHECK: ret i64 [[VSLID_N_S643]] int64_t test_vslid_n_s64(int64_t a, int64_t b) { return (int64_t)vslid_n_s64(a, b, 63); } @@ -19046,8 +19046,8 @@ int64_t test_vslid_n_s64(int64_t a, int64_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N]]1, i32 1) -// CHECK: ret <1 x i64> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1) +// CHECK: ret <1 x i64> [[VSLI_N2]] int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { return vsli_n_s64(a, b, 1); } @@ -19055,9 +19055,9 @@ int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { // CHECK-LABEL: define i64 @test_vslid_n_u64(i64 %a, i64 %b) #0 { // CHECK: [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64> // CHECK: [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64> -// CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U64]]1, i32 63) -// CHECK: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U64]]2 to i64 -// CHECK: ret i64 [[VSLID_N_U64]]3 +// CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63) +// CHECK: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64 +// CHECK: ret i64 [[VSLID_N_U643]] uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) { return (uint64_t)vslid_n_u64(a, b, 63); } @@ -19067,8 +19067,8 @@ uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N]]1, i32 1) -// CHECK: ret <1 x i64> [[VSLI_N]]2 +// CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1) +// CHECK: ret <1 x i64> [[VSLI_N2]] uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) { return vsli_n_u64(a, b, 1); } @@ -22336,7 +22336,7 @@ float64x1_t test_vcvt_f64_u64(uint64x1_t a) { // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64) -// CHECK: ret <1 x i64> [[VCVT_N]]1 +// CHECK: ret <1 x i64> [[VCVT_N1]] int64x1_t test_vcvt_n_s64_f64(float64x1_t a) { return vcvt_n_s64_f64(a, 64); } @@ -22345,7 +22345,7 @@ int64x1_t test_vcvt_n_s64_f64(float64x1_t a) { // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64) -// CHECK: ret <1 x i64> [[VCVT_N]]1 +// CHECK: ret <1 x i64> [[VCVT_N1]] uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) { return vcvt_n_u64_f64(a, 64); } @@ -22354,7 +22354,7 @@ uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64) -// CHECK: ret <1 x double> [[VCVT_N]]1 +// CHECK: ret <1 x double> [[VCVT_N1]] float64x1_t test_vcvt_n_f64_s64(int64x1_t a) { return vcvt_n_f64_s64(a, 64); } @@ -22363,7 +22363,7 @@ float64x1_t test_vcvt_n_f64_s64(int64x1_t a) { // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64) -// CHECK: ret <1 x double> [[VCVT_N]]1 +// CHECK: ret <1 x double> [[VCVT_N1]] float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) { return vcvt_n_f64_u64(a, 64); } diff --git a/test/CodeGen/aarch64-neon-ldst-one.c b/test/CodeGen/aarch64-neon-ldst-one.c index e904dcb1d0..25bd797b92 100644 --- a/test/CodeGen/aarch64-neon-ldst-one.c +++ b/test/CodeGen/aarch64-neon-ldst-one.c @@ -2135,8 +2135,8 @@ poly64x1_t test_vld1_lane_p64(poly64_t *a, poly64x1_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %ptr) // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8> }* // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] @@ -2164,8 +2164,8 @@ int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %ptr) // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8> }* // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] @@ -2193,8 +2193,8 @@ uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %ptr) // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8> }* // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2_LANE]], { <16 x i8>, <16 x i8> }* [[TMP5]] @@ -2222,11 +2222,11 @@ poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %ptr) // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8> }* // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] @@ -2254,11 +2254,11 @@ int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %ptr) // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8> }* // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] @@ -2288,19 +2288,19 @@ uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) { // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP1]]0 +// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x2_t [[TMP1]]3 +// CHECK: ret %struct.uint16x8x2_t [[TMP13]] uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) { return vld2q_lane_u16(a, b, 7); } @@ -2322,19 +2322,19 @@ uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i8(<4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP1]]0 +// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x2_t [[TMP1]]3 +// CHECK: ret %struct.uint32x4x2_t [[TMP13]] uint32x4x2_t test_vld2q_lane_u32(uint32_t *a, uint32x4x2_t b) { return vld2q_lane_u32(a, b, 3); } @@ -2356,19 +2356,19 @@ uint32x4x2_t test_vld2q_lane_u32(uint32_t *a, uint32x4x2_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i8(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64> }* -// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP1]]0 +// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint64x2x2_t [[TMP1]]3 +// CHECK: ret %struct.uint64x2x2_t [[TMP13]] uint64x2x2_t test_vld2q_lane_u64(uint64_t *a, uint64x2x2_t b) { return vld2q_lane_u64(a, b, 1); } @@ -2390,19 +2390,19 @@ uint64x2x2_t test_vld2q_lane_u64(uint64_t *a, uint64x2x2_t b) { // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP1]]0 +// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x2_t [[TMP1]]3 +// CHECK: ret %struct.int16x8x2_t [[TMP13]] int16x8x2_t test_vld2q_lane_s16(int16_t *a, int16x8x2_t b) { return vld2q_lane_s16(a, b, 7); } @@ -2424,19 +2424,19 @@ int16x8x2_t test_vld2q_lane_s16(int16_t *a, int16x8x2_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i8(<4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP1]]0 +// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2_LANE]], { <4 x i32>, <4 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x2_t [[TMP1]]3 +// CHECK: ret %struct.int32x4x2_t [[TMP13]] int32x4x2_t test_vld2q_lane_s32(int32_t *a, int32x4x2_t b) { return vld2q_lane_s32(a, b, 3); } @@ -2458,19 +2458,19 @@ int32x4x2_t test_vld2q_lane_s32(int32_t *a, int32x4x2_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i8(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64> }* -// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP1]]0 +// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int64x2x2_t, %struct.int64x2x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int64x2x2_t [[TMP1]]3 +// CHECK: ret %struct.int64x2x2_t [[TMP13]] int64x2x2_t test_vld2q_lane_s64(int64_t *a, int64x2x2_t b) { return vld2q_lane_s64(a, b, 1); } @@ -2492,19 +2492,19 @@ int64x2x2_t test_vld2q_lane_s64(int64_t *a, int64x2x2_t b) { // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP1]]0 +// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float16x8x2_t, %struct.float16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float16x8x2_t [[TMP1]]3 +// CHECK: ret %struct.float16x8x2_t [[TMP13]] float16x8x2_t test_vld2q_lane_f16(float16_t *a, float16x8x2_t b) { return vld2q_lane_f16(a, b, 7); } @@ -2526,19 +2526,19 @@ float16x8x2_t test_vld2q_lane_f16(float16_t *a, float16x8x2_t b) { // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> // CHECK: [[VLD2_LANE:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0i8(<4 x float> [[TMP8]], <4 x float> [[TMP9]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float> } [[VLD2_LANE]], { <4 x float>, <4 x float> }* [[TMP1]]0 +// CHECK: store { <4 x float>, <4 x float> } [[VLD2_LANE]], { <4 x float>, <4 x float> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x2_t [[TMP1]]3 +// CHECK: ret %struct.float32x4x2_t [[TMP13]] float32x4x2_t test_vld2q_lane_f32(float32_t *a, float32x4x2_t b) { return vld2q_lane_f32(a, b, 3); } @@ -2560,19 +2560,19 @@ float32x4x2_t test_vld2q_lane_f32(float32_t *a, float32x4x2_t b) { // CHECK: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double> // CHECK: [[VLD2_LANE:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0i8(<2 x double> [[TMP8]], <2 x double> [[TMP9]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x double>, <2 x double> }* -// CHECK: store { <2 x double>, <2 x double> } [[VLD2_LANE]], { <2 x double>, <2 x double> }* [[TMP1]]0 +// CHECK: store { <2 x double>, <2 x double> } [[VLD2_LANE]], { <2 x double>, <2 x double> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float64x2x2_t, %struct.float64x2x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float64x2x2_t [[TMP1]]3 +// CHECK: ret %struct.float64x2x2_t [[TMP13]] float64x2x2_t test_vld2q_lane_f64(float64_t *a, float64x2x2_t b) { return vld2q_lane_f64(a, b, 1); } @@ -2594,19 +2594,19 @@ float64x2x2_t test_vld2q_lane_f64(float64_t *a, float64x2x2_t b) { // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i8(<8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP1]]0 +// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2_LANE]], { <8 x i16>, <8 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x2_t [[TMP1]]3 +// CHECK: ret %struct.poly16x8x2_t [[TMP13]] poly16x8x2_t test_vld2q_lane_p16(poly16_t *a, poly16x8x2_t b) { return vld2q_lane_p16(a, b, 7); } @@ -2628,19 +2628,19 @@ poly16x8x2_t test_vld2q_lane_p16(poly16_t *a, poly16x8x2_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i8(<2 x i64> [[TMP8]], <2 x i64> [[TMP9]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64> }* -// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP1]]0 +// CHECK: store { <2 x i64>, <2 x i64> } [[VLD2_LANE]], { <2 x i64>, <2 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 32, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 32, i32 16, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly64x2x2_t [[TMP1]]3 +// CHECK: ret %struct.poly64x2x2_t [[TMP13]] poly64x2x2_t test_vld2q_lane_p64(poly64_t *a, poly64x2x2_t b) { return vld2q_lane_p64(a, b, 1); } @@ -2660,8 +2660,8 @@ poly64x2x2_t test_vld2q_lane_p64(poly64_t *a, poly64x2x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8> }* // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] @@ -2691,19 +2691,19 @@ uint8x8x2_t test_vld2_lane_u8(uint8_t *a, uint8x8x2_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP1]]0 +// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x2_t [[TMP1]]3 +// CHECK: ret %struct.uint16x4x2_t [[TMP13]] uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) { return vld2_lane_u16(a, b, 3); } @@ -2725,19 +2725,19 @@ uint16x4x2_t test_vld2_lane_u16(uint16_t *a, uint16x4x2_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i8(<2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP1]]0 +// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x2_t [[TMP1]]3 +// CHECK: ret %struct.uint32x2x2_t [[TMP13]] uint32x2x2_t test_vld2_lane_u32(uint32_t *a, uint32x2x2_t b) { return vld2_lane_u32(a, b, 1); } @@ -2759,19 +2759,19 @@ uint32x2x2_t test_vld2_lane_u32(uint32_t *a, uint32x2x2_t b) { // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> // CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP1]]0 +// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.uint64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint64x1x2_t [[TMP1]]3 +// CHECK: ret %struct.uint64x1x2_t [[TMP13]] uint64x1x2_t test_vld2_lane_u64(uint64_t *a, uint64x1x2_t b) { return vld2_lane_u64(a, b, 0); } @@ -2791,8 +2791,8 @@ uint64x1x2_t test_vld2_lane_u64(uint64_t *a, uint64x1x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8> }* // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] @@ -2822,19 +2822,19 @@ int8x8x2_t test_vld2_lane_s8(int8_t *a, int8x8x2_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP1]]0 +// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x2_t [[TMP1]]3 +// CHECK: ret %struct.int16x4x2_t [[TMP13]] int16x4x2_t test_vld2_lane_s16(int16_t *a, int16x4x2_t b) { return vld2_lane_s16(a, b, 3); } @@ -2856,19 +2856,19 @@ int16x4x2_t test_vld2_lane_s16(int16_t *a, int16x4x2_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> // CHECK: [[VLD2_LANE:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i8(<2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP1]]0 +// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE]], { <2 x i32>, <2 x i32> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x2_t [[TMP1]]3 +// CHECK: ret %struct.int32x2x2_t [[TMP13]] int32x2x2_t test_vld2_lane_s32(int32_t *a, int32x2x2_t b) { return vld2_lane_s32(a, b, 1); } @@ -2890,19 +2890,19 @@ int32x2x2_t test_vld2_lane_s32(int32_t *a, int32x2x2_t b) { // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> // CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP1]]0 +// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.int64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.int64x1x2_t, %struct.int64x1x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int64x1x2_t [[TMP1]]3 +// CHECK: ret %struct.int64x1x2_t [[TMP13]] int64x1x2_t test_vld2_lane_s64(int64_t *a, int64x1x2_t b) { return vld2_lane_s64(a, b, 0); } @@ -2924,19 +2924,19 @@ int64x1x2_t test_vld2_lane_s64(int64_t *a, int64x1x2_t b) { // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP1]]0 +// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float16x4x2_t, %struct.float16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float16x4x2_t [[TMP1]]3 +// CHECK: ret %struct.float16x4x2_t [[TMP13]] float16x4x2_t test_vld2_lane_f16(float16_t *a, float16x4x2_t b) { return vld2_lane_f16(a, b, 3); } @@ -2958,19 +2958,19 @@ float16x4x2_t test_vld2_lane_f16(float16_t *a, float16x4x2_t b) { // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> // CHECK: [[VLD2_LANE:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0i8(<2 x float> [[TMP8]], <2 x float> [[TMP9]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE]], { <2 x float>, <2 x float> }* [[TMP1]]0 +// CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE]], { <2 x float>, <2 x float> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x2_t [[TMP1]]3 +// CHECK: ret %struct.float32x2x2_t [[TMP13]] float32x2x2_t test_vld2_lane_f32(float32_t *a, float32x2x2_t b) { return vld2_lane_f32(a, b, 1); } @@ -2992,19 +2992,19 @@ float32x2x2_t test_vld2_lane_f32(float32_t *a, float32x2x2_t b) { // CHECK: [[TMP4:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double> // CHECK: [[VLD2_LANE:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0i8(<1 x double> [[TMP8]], <1 x double> [[TMP9]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <1 x double>, <1 x double> }* -// CHECK: store { <1 x double>, <1 x double> } [[VLD2_LANE]], { <1 x double>, <1 x double> }* [[TMP1]]0 +// CHECK: store { <1 x double>, <1 x double> } [[VLD2_LANE]], { <1 x double>, <1 x double> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.float64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.float64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.float64x1x2_t, %struct.float64x1x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float64x1x2_t [[TMP1]]3 +// CHECK: ret %struct.float64x1x2_t [[TMP13]] float64x1x2_t test_vld2_lane_f64(float64_t *a, float64x1x2_t b) { return vld2_lane_f64(a, b, 0); } @@ -3024,8 +3024,8 @@ float64x1x2_t test_vld2_lane_f64(float64_t *a, float64x1x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) // CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8> }* // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE]], { <8 x i8>, <8 x i8> }* [[TMP5]] @@ -3055,19 +3055,19 @@ poly8x8x2_t test_vld2_lane_p8(poly8_t *a, poly8x8x2_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[VLD2_LANE:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i8(<4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP1]]0 +// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE]], { <4 x i16>, <4 x i16> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x2_t [[TMP1]]3 +// CHECK: ret %struct.poly16x4x2_t [[TMP13]] poly16x4x2_t test_vld2_lane_p16(poly16_t *a, poly16x4x2_t b) { return vld2_lane_p16(a, b, 3); } @@ -3089,19 +3089,19 @@ poly16x4x2_t test_vld2_lane_p16(poly16_t *a, poly16x4x2_t b) { // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> // CHECK: [[VLD2_LANE:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i8(<1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP1]]0 +// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_LANE]], { <1 x i64>, <1 x i64> }* [[TMP10]] // CHECK: [[TMP11:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8* // CHECK: [[TMP12:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]1, i8* [[TMP1]]2, i64 16, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP11]], i8* [[TMP12]], i64 16, i32 8, i1 false) // CHECK: [[TMP13:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly64x1x2_t [[TMP1]]3 +// CHECK: ret %struct.poly64x1x2_t [[TMP13]] poly64x1x2_t test_vld2_lane_p64(poly64_t *a, poly64x1x2_t b) { return vld2_lane_p64(a, b, 0); } @@ -3123,24 +3123,24 @@ poly64x1x2_t test_vld2_lane_p64(poly64_t *a, poly64x1x2_t b) { // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> -// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, i64 7, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]3 +// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x3_t [[TMP1]]6 +// CHECK: ret %struct.uint16x8x3_t [[TMP16]] uint16x8x3_t test_vld3q_lane_u16(uint16_t *a, uint16x8x3_t b) { return vld3q_lane_u16(a, b, 7); } @@ -3162,24 +3162,24 @@ uint16x8x3_t test_vld3q_lane_u16(uint16_t *a, uint16x8x3_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> -// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i8(<4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, i64 3, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i8(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP1]]3 +// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x3_t [[TMP1]]6 +// CHECK: ret %struct.uint32x4x3_t [[TMP16]] uint32x4x3_t test_vld3q_lane_u32(uint32_t *a, uint32x4x3_t b) { return vld3q_lane_u32(a, b, 3); } @@ -3201,24 +3201,24 @@ uint32x4x3_t test_vld3q_lane_u32(uint32_t *a, uint32x4x3_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> -// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i8(<2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, i64 1, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i8(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64> }* -// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP1]]3 +// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint64x2x3_t [[TMP1]]6 +// CHECK: ret %struct.uint64x2x3_t [[TMP16]] uint64x2x3_t test_vld3q_lane_u64(uint64_t *a, uint64x2x3_t b) { return vld3q_lane_u64(a, b, 1); } @@ -3240,24 +3240,24 @@ uint64x2x3_t test_vld3q_lane_u64(uint64_t *a, uint64x2x3_t b) { // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> -// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, i64 7, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]3 +// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int16x8x3_t, %struct.int16x8x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x3_t [[TMP1]]6 +// CHECK: ret %struct.int16x8x3_t [[TMP16]] int16x8x3_t test_vld3q_lane_s16(int16_t *a, int16x8x3_t b) { return vld3q_lane_s16(a, b, 7); } @@ -3279,24 +3279,24 @@ int16x8x3_t test_vld3q_lane_s16(int16_t *a, int16x8x3_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> -// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i8(<4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, i64 3, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i8(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP1]]3 +// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3_LANE]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int32x4x3_t, %struct.int32x4x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x3_t [[TMP1]]6 +// CHECK: ret %struct.int32x4x3_t [[TMP16]] int32x4x3_t test_vld3q_lane_s32(int32_t *a, int32x4x3_t b) { return vld3q_lane_s32(a, b, 3); } @@ -3318,24 +3318,24 @@ int32x4x3_t test_vld3q_lane_s32(int32_t *a, int32x4x3_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> -// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i8(<2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, i64 1, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i8(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64> }* -// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP1]]3 +// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int64x2x3_t, %struct.int64x2x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int64x2x3_t [[TMP1]]6 +// CHECK: ret %struct.int64x2x3_t [[TMP16]] int64x2x3_t test_vld3q_lane_s64(int64_t *a, int64x2x3_t b) { return vld3q_lane_s64(a, b, 1); } @@ -3357,24 +3357,24 @@ int64x2x3_t test_vld3q_lane_s64(int64_t *a, int64x2x3_t b) { // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> -// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, i64 7, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]3 +// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float16x8x3_t, %struct.float16x8x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float16x8x3_t [[TMP1]]6 +// CHECK: ret %struct.float16x8x3_t [[TMP16]] float16x8x3_t test_vld3q_lane_f16(float16_t *a, float16x8x3_t b) { return vld3q_lane_f16(a, b, 7); } @@ -3396,24 +3396,24 @@ float16x8x3_t test_vld3q_lane_f16(float16_t *a, float16x8x3_t b) { // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> -// CHECK: [[VLD3_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0i8(<4 x float> [[TMP1]]0, <4 x float> [[TMP1]]1, <4 x float> [[TMP1]]2, i64 3, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0i8(<4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x float>, <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP1]]3 +// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3_LANE]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float32x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float32x4x3_t, %struct.float32x4x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x3_t [[TMP1]]6 +// CHECK: ret %struct.float32x4x3_t [[TMP16]] float32x4x3_t test_vld3q_lane_f32(float32_t *a, float32x4x3_t b) { return vld3q_lane_f32(a, b, 3); } @@ -3435,24 +3435,24 @@ float32x4x3_t test_vld3q_lane_f32(float32_t *a, float32x4x3_t b) { // CHECK: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <2 x double> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x double> -// CHECK: [[VLD3_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0i8(<2 x double> [[TMP1]]0, <2 x double> [[TMP1]]1, <2 x double> [[TMP1]]2, i64 1, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0i8(<2 x double> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x double>, <2 x double>, <2 x double> }* -// CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP1]]3 +// CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3_LANE]], { <2 x double>, <2 x double>, <2 x double> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float64x2x3_t, %struct.float64x2x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float64x2x3_t [[TMP1]]6 +// CHECK: ret %struct.float64x2x3_t [[TMP16]] float64x2x3_t test_vld3q_lane_f64(float64_t *a, float64x2x3_t b) { return vld3q_lane_f64(a, b, 1); } @@ -3472,11 +3472,11 @@ float64x2x3_t test_vld3q_lane_f64(float64_t *a, float64x2x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %a) // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8> }* // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP6]] @@ -3506,24 +3506,24 @@ poly8x16x3_t test_vld3q_lane_p8(poly8_t *a, poly8x16x3_t b) { // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> -// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, i64 7, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i8(<8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]3 +// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3_LANE]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly16x8x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x3_t [[TMP1]]6 +// CHECK: ret %struct.poly16x8x3_t [[TMP16]] poly16x8x3_t test_vld3q_lane_p16(poly16_t *a, poly16x8x3_t b) { return vld3q_lane_p16(a, b, 7); } @@ -3545,24 +3545,24 @@ poly16x8x3_t test_vld3q_lane_p16(poly16_t *a, poly16x8x3_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> -// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i8(<2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, i64 1, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i8(<2 x i64> [[TMP10]], <2 x i64> [[TMP11]], <2 x i64> [[TMP12]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64> }* -// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP1]]3 +// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3_LANE]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 48, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 48, i32 16, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly64x2x3_t [[TMP1]]6 +// CHECK: ret %struct.poly64x2x3_t [[TMP16]] poly64x2x3_t test_vld3q_lane_p64(poly64_t *a, poly64x2x3_t b) { return vld3q_lane_p64(a, b, 1); } @@ -3582,11 +3582,11 @@ poly64x2x3_t test_vld3q_lane_p64(poly64_t *a, poly64x2x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8> }* // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] @@ -3616,24 +3616,24 @@ uint8x8x3_t test_vld3_lane_u8(uint8_t *a, uint8x8x3_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> -// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, i64 3, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]3 +// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x3_t [[TMP1]]6 +// CHECK: ret %struct.uint16x4x3_t [[TMP16]] uint16x4x3_t test_vld3_lane_u16(uint16_t *a, uint16x4x3_t b) { return vld3_lane_u16(a, b, 3); } @@ -3655,24 +3655,24 @@ uint16x4x3_t test_vld3_lane_u16(uint16_t *a, uint16x4x3_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> -// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i8(<2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, i64 1, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i8(<2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP1]]3 +// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x3_t [[TMP1]]6 +// CHECK: ret %struct.uint32x2x3_t [[TMP16]] uint32x2x3_t test_vld3_lane_u32(uint32_t *a, uint32x2x3_t b) { return vld3_lane_u32(a, b, 1); } @@ -3694,24 +3694,24 @@ uint32x2x3_t test_vld3_lane_u32(uint32_t *a, uint32x2x3_t b) { // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> -// CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i8(<1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, i64 0, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i8(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP1]]3 +// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.uint64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint64x1x3_t [[TMP1]]6 +// CHECK: ret %struct.uint64x1x3_t [[TMP16]] uint64x1x3_t test_vld3_lane_u64(uint64_t *a, uint64x1x3_t b) { return vld3_lane_u64(a, b, 0); } @@ -3731,11 +3731,11 @@ uint64x1x3_t test_vld3_lane_u64(uint64_t *a, uint64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8> }* // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] @@ -3765,24 +3765,24 @@ int8x8x3_t test_vld3_lane_s8(int8_t *a, int8x8x3_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> -// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, i64 3, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]3 +// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int16x4x3_t, %struct.int16x4x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x3_t [[TMP1]]6 +// CHECK: ret %struct.int16x4x3_t [[TMP16]] int16x4x3_t test_vld3_lane_s16(int16_t *a, int16x4x3_t b) { return vld3_lane_s16(a, b, 3); } @@ -3804,24 +3804,24 @@ int16x4x3_t test_vld3_lane_s16(int16_t *a, int16x4x3_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> -// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i8(<2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, i64 1, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i8(<2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP1]]3 +// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int32x2x3_t, %struct.int32x2x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x3_t [[TMP1]]6 +// CHECK: ret %struct.int32x2x3_t [[TMP16]] int32x2x3_t test_vld3_lane_s32(int32_t *a, int32x2x3_t b) { return vld3_lane_s32(a, b, 1); } @@ -3843,24 +3843,24 @@ int32x2x3_t test_vld3_lane_s32(int32_t *a, int32x2x3_t b) { // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> -// CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i8(<1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, i64 0, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i8(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP1]]3 +// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.int64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.int64x1x3_t, %struct.int64x1x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int64x1x3_t [[TMP1]]6 +// CHECK: ret %struct.int64x1x3_t [[TMP16]] int64x1x3_t test_vld3_lane_s64(int64_t *a, int64x1x3_t b) { return vld3_lane_s64(a, b, 0); } @@ -3882,24 +3882,24 @@ int64x1x3_t test_vld3_lane_s64(int64_t *a, int64x1x3_t b) { // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> -// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, i64 3, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]3 +// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float16x4x3_t, %struct.float16x4x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float16x4x3_t [[TMP1]]6 +// CHECK: ret %struct.float16x4x3_t [[TMP16]] float16x4x3_t test_vld3_lane_f16(float16_t *a, float16x4x3_t b) { return vld3_lane_f16(a, b, 3); } @@ -3921,24 +3921,24 @@ float16x4x3_t test_vld3_lane_f16(float16_t *a, float16x4x3_t b) { // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> -// CHECK: [[VLD3_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0i8(<2 x float> [[TMP1]]0, <2 x float> [[TMP1]]1, <2 x float> [[TMP1]]2, i64 1, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0i8(<2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <2 x float>, <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP1]]3 +// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float32x2x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float32x2x3_t, %struct.float32x2x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x3_t [[TMP1]]6 +// CHECK: ret %struct.float32x2x3_t [[TMP16]] float32x2x3_t test_vld3_lane_f32(float32_t *a, float32x2x3_t b) { return vld3_lane_f32(a, b, 1); } @@ -3960,24 +3960,24 @@ float32x2x3_t test_vld3_lane_f32(float32_t *a, float32x2x3_t b) { // CHECK: [[TMP4:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <1 x double> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x double> -// CHECK: [[VLD3_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0i8(<1 x double> [[TMP1]]0, <1 x double> [[TMP1]]1, <1 x double> [[TMP1]]2, i64 0, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0i8(<1 x double> [[TMP10]], <1 x double> [[TMP11]], <1 x double> [[TMP12]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <1 x double>, <1 x double>, <1 x double> }* -// CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP1]]3 +// CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3_LANE]], { <1 x double>, <1 x double>, <1 x double> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.float64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.float64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.float64x1x3_t, %struct.float64x1x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float64x1x3_t [[TMP1]]6 +// CHECK: ret %struct.float64x1x3_t [[TMP16]] float64x1x3_t test_vld3_lane_f64(float64_t *a, float64x1x3_t b) { return vld3_lane_f64(a, b, 0); } @@ -3997,11 +3997,11 @@ float64x1x3_t test_vld3_lane_f64(float64_t *a, float64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) // CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8> }* // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP6]] @@ -4031,24 +4031,24 @@ poly8x8x3_t test_vld3_lane_p8(poly8_t *a, poly8x8x3_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> -// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, i64 3, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i8(<4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]3 +// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly16x4x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x3_t [[TMP1]]6 +// CHECK: ret %struct.poly16x4x3_t [[TMP16]] poly16x4x3_t test_vld3_lane_p16(poly16_t *a, poly16x4x3_t b) { return vld3_lane_p16(a, b, 3); } @@ -4070,24 +4070,24 @@ poly16x4x3_t test_vld3_lane_p16(poly16_t *a, poly16x4x3_t b) { // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> -// CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i8(<1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, i64 0, i8* [[TMP3]]) +// CHECK: [[VLD3_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i8(<1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP13:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP1]]3 +// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_LANE]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP13]] // CHECK: [[TMP14:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8* // CHECK: [[TMP15:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]4, i8* [[TMP1]]5, i64 24, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP14]], i8* [[TMP15]], i64 24, i32 8, i1 false) // CHECK: [[TMP16:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly64x1x3_t [[TMP1]]6 +// CHECK: ret %struct.poly64x1x3_t [[TMP16]] poly64x1x3_t test_vld3_lane_p64(poly64_t *a, poly64x1x3_t b) { return vld3_lane_p64(a, b, 0); } @@ -4107,14 +4107,14 @@ poly64x1x3_t test_vld3_lane_p64(poly64_t *a, poly64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, i8* %a) // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] @@ -4122,7 +4122,7 @@ poly64x1x3_t test_vld3_lane_p64(poly64_t *a, poly64x1x3_t b) { // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint8x16x4_t [[TMP1]]0 +// CHECK: ret %struct.uint8x16x4_t [[TMP10]] uint8x16x4_t test_vld4q_lane_u8(uint8_t *a, uint8x16x4_t b) { return vld4q_lane_u8(a, b, 15); } @@ -4144,29 +4144,29 @@ uint8x16x4_t test_vld4q_lane_u8(uint8_t *a, uint8x16x4_t b) { // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <8 x i16> -// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, <8 x i16> [[TMP1]]5, i64 7, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> +// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]6 +// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x4_t [[TMP1]]9 +// CHECK: ret %struct.uint16x8x4_t [[TMP19]] uint16x8x4_t test_vld4q_lane_u16(uint16_t *a, uint16x8x4_t b) { return vld4q_lane_u16(a, b, 7); } @@ -4188,29 +4188,29 @@ uint16x8x4_t test_vld4q_lane_u16(uint16_t *a, uint16x8x4_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <4 x i32> -// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i8(<4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, <4 x i32> [[TMP1]]5, i64 3, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> +// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i8(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP1]]6 +// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x4_t [[TMP1]]9 +// CHECK: ret %struct.uint32x4x4_t [[TMP19]] uint32x4x4_t test_vld4q_lane_u32(uint32_t *a, uint32x4x4_t b) { return vld4q_lane_u32(a, b, 3); } @@ -4232,29 +4232,29 @@ uint32x4x4_t test_vld4q_lane_u32(uint32_t *a, uint32x4x4_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <2 x i64> -// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i8(<2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, <2 x i64> [[TMP1]]5, i64 1, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64> +// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i8(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* -// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP1]]6 +// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint64x2x4_t [[TMP1]]9 +// CHECK: ret %struct.uint64x2x4_t [[TMP19]] uint64x2x4_t test_vld4q_lane_u64(uint64_t *a, uint64x2x4_t b) { return vld4q_lane_u64(a, b, 1); } @@ -4274,14 +4274,14 @@ uint64x2x4_t test_vld4q_lane_u64(uint64_t *a, uint64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, i8* %a) // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] @@ -4289,7 +4289,7 @@ uint64x2x4_t test_vld4q_lane_u64(uint64_t *a, uint64x2x4_t b) { // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.int8x16x4_t, %struct.int8x16x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int8x16x4_t [[TMP1]]0 +// CHECK: ret %struct.int8x16x4_t [[TMP10]] int8x16x4_t test_vld4q_lane_s8(int8_t *a, int8x16x4_t b) { return vld4q_lane_s8(a, b, 15); } @@ -4311,29 +4311,29 @@ int8x16x4_t test_vld4q_lane_s8(int8_t *a, int8x16x4_t b) { // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <8 x i16> -// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, <8 x i16> [[TMP1]]5, i64 7, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> +// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]6 +// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int16x8x4_t, %struct.int16x8x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x4_t [[TMP1]]9 +// CHECK: ret %struct.int16x8x4_t [[TMP19]] int16x8x4_t test_vld4q_lane_s16(int16_t *a, int16x8x4_t b) { return vld4q_lane_s16(a, b, 7); } @@ -4355,29 +4355,29 @@ int16x8x4_t test_vld4q_lane_s16(int16_t *a, int16x8x4_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <4 x i32> -// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i8(<4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, <4 x i32> [[TMP1]]5, i64 3, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32> +// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i8(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP1]]6 +// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4_LANE]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int32x4x4_t, %struct.int32x4x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x4_t [[TMP1]]9 +// CHECK: ret %struct.int32x4x4_t [[TMP19]] int32x4x4_t test_vld4q_lane_s32(int32_t *a, int32x4x4_t b) { return vld4q_lane_s32(a, b, 3); } @@ -4399,29 +4399,29 @@ int32x4x4_t test_vld4q_lane_s32(int32_t *a, int32x4x4_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <2 x i64> -// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i8(<2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, <2 x i64> [[TMP1]]5, i64 1, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64> +// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i8(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* -// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP1]]6 +// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int64x2x4_t, %struct.int64x2x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int64x2x4_t [[TMP1]]9 +// CHECK: ret %struct.int64x2x4_t [[TMP19]] int64x2x4_t test_vld4q_lane_s64(int64_t *a, int64x2x4_t b) { return vld4q_lane_s64(a, b, 1); } @@ -4443,29 +4443,29 @@ int64x2x4_t test_vld4q_lane_s64(int64_t *a, int64x2x4_t b) { // CHECK: [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <8 x i16> -// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, <8 x i16> [[TMP1]]5, i64 7, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> +// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]6 +// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float16x8x4_t, %struct.float16x8x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float16x8x4_t [[TMP1]]9 +// CHECK: ret %struct.float16x8x4_t [[TMP19]] float16x8x4_t test_vld4q_lane_f16(float16_t *a, float16x8x4_t b) { return vld4q_lane_f16(a, b, 7); } @@ -4487,29 +4487,29 @@ float16x8x4_t test_vld4q_lane_f16(float16_t *a, float16x8x4_t b) { // CHECK: [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <4 x float> -// CHECK: [[VLD4_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0i8(<4 x float> [[TMP1]]2, <4 x float> [[TMP1]]3, <4 x float> [[TMP1]]4, <4 x float> [[TMP1]]5, i64 3, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float> +// CHECK: [[VLD4_LANE:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0i8(<4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP1]]6 +// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4_LANE]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float32x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float32x4x4_t, %struct.float32x4x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x4_t [[TMP1]]9 +// CHECK: ret %struct.float32x4x4_t [[TMP19]] float32x4x4_t test_vld4q_lane_f32(float32_t *a, float32x4x4_t b) { return vld4q_lane_f32(a, b, 3); } @@ -4531,29 +4531,29 @@ float32x4x4_t test_vld4q_lane_f32(float32_t *a, float32x4x4_t b) { // CHECK: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x double> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x double> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <2 x double> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <2 x double> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <2 x double> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x double> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x double> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x double> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <2 x double> -// CHECK: [[VLD4_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0i8(<2 x double> [[TMP1]]2, <2 x double> [[TMP1]]3, <2 x double> [[TMP1]]4, <2 x double> [[TMP1]]5, i64 1, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x double> +// CHECK: [[VLD4_LANE:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0i8(<2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], <2 x double> [[TMP15]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* -// CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP1]]6 +// CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4_LANE]], { <2 x double>, <2 x double>, <2 x double>, <2 x double> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float64x2x4_t, %struct.float64x2x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float64x2x4_t [[TMP1]]9 +// CHECK: ret %struct.float64x2x4_t [[TMP19]] float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) { return vld4q_lane_f64(a, b, 1); } @@ -4573,14 +4573,14 @@ float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i64 15, i8* %a) // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP7]] @@ -4588,7 +4588,7 @@ float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) { // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 64, i32 16, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly8x16x4_t [[TMP1]]0 +// CHECK: ret %struct.poly8x16x4_t [[TMP10]] poly8x16x4_t test_vld4q_lane_p8(poly8_t *a, poly8x16x4_t b) { return vld4q_lane_p8(a, b, 15); } @@ -4610,29 +4610,29 @@ poly8x16x4_t test_vld4q_lane_p8(poly8_t *a, poly8x16x4_t b) { // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <8 x i16> -// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, <8 x i16> [[TMP1]]5, i64 7, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16> +// CHECK: [[VLD4_LANE:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i8(<8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i64 7, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP1]]6 +// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4_LANE]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly16x8x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x4_t [[TMP1]]9 +// CHECK: ret %struct.poly16x8x4_t [[TMP19]] poly16x8x4_t test_vld4q_lane_p16(poly16_t *a, poly16x8x4_t b) { return vld4q_lane_p16(a, b, 7); } @@ -4654,29 +4654,29 @@ poly16x8x4_t test_vld4q_lane_p16(poly16_t *a, poly16x8x4_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP8]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 -// CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP1]]0 to <16 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 +// CHECK: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP10]] to <16 x i8> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <2 x i64> // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> -// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP1]]1 to <2 x i64> -// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i8(<2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, <2 x i64> [[TMP1]]5, i64 1, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <2 x i64> +// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i8(<2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <2 x i64> [[TMP15]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* -// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP1]]6 +// CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4_LANE]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 64, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 64, i32 16, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly64x2x4_t [[TMP1]]9 +// CHECK: ret %struct.poly64x2x4_t [[TMP19]] poly64x2x4_t test_vld4q_lane_p64(poly64_t *a, poly64x2x4_t b) { return vld4q_lane_p64(a, b, 1); } @@ -4696,14 +4696,14 @@ poly64x2x4_t test_vld4q_lane_p64(poly64_t *a, poly64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, i8* %a) // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] @@ -4711,7 +4711,7 @@ poly64x2x4_t test_vld4q_lane_p64(poly64_t *a, poly64x2x4_t b) { // CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint8x8x4_t [[TMP1]]0 +// CHECK: ret %struct.uint8x8x4_t [[TMP10]] uint8x8x4_t test_vld4_lane_u8(uint8_t *a, uint8x8x4_t b) { return vld4_lane_u8(a, b, 7); } @@ -4733,29 +4733,29 @@ uint8x8x4_t test_vld4_lane_u8(uint8_t *a, uint8x8x4_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <4 x i16> -// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, <4 x i16> [[TMP1]]5, i64 3, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> +// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]6 +// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x4_t [[TMP1]]9 +// CHECK: ret %struct.uint16x4x4_t [[TMP19]] uint16x4x4_t test_vld4_lane_u16(uint16_t *a, uint16x4x4_t b) { return vld4_lane_u16(a, b, 3); } @@ -4777,29 +4777,29 @@ uint16x4x4_t test_vld4_lane_u16(uint16_t *a, uint16x4x4_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <2 x i32> -// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i8(<2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, <2 x i32> [[TMP1]]5, i64 1, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> +// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i8(<2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP1]]6 +// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x4_t [[TMP1]]9 +// CHECK: ret %struct.uint32x2x4_t [[TMP19]] uint32x2x4_t test_vld4_lane_u32(uint32_t *a, uint32x2x4_t b) { return vld4_lane_u32(a, b, 1); } @@ -4821,29 +4821,29 @@ uint32x2x4_t test_vld4_lane_u32(uint32_t *a, uint32x2x4_t b) { // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <1 x i64> -// CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i8(<1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, <1 x i64> [[TMP1]]5, i64 0, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +// CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i8(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP1]]6 +// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.uint64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint64x1x4_t [[TMP1]]9 +// CHECK: ret %struct.uint64x1x4_t [[TMP19]] uint64x1x4_t test_vld4_lane_u64(uint64_t *a, uint64x1x4_t b) { return vld4_lane_u64(a, b, 0); } @@ -4863,14 +4863,14 @@ uint64x1x4_t test_vld4_lane_u64(uint64_t *a, uint64x1x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, i8* %a) // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] @@ -4878,7 +4878,7 @@ uint64x1x4_t test_vld4_lane_u64(uint64_t *a, uint64x1x4_t b) { // CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.int8x8x4_t, %struct.int8x8x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int8x8x4_t [[TMP1]]0 +// CHECK: ret %struct.int8x8x4_t [[TMP10]] int8x8x4_t test_vld4_lane_s8(int8_t *a, int8x8x4_t b) { return vld4_lane_s8(a, b, 7); } @@ -4900,29 +4900,29 @@ int8x8x4_t test_vld4_lane_s8(int8_t *a, int8x8x4_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <4 x i16> -// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, <4 x i16> [[TMP1]]5, i64 3, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> +// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]6 +// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int16x4x4_t, %struct.int16x4x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x4_t [[TMP1]]9 +// CHECK: ret %struct.int16x4x4_t [[TMP19]] int16x4x4_t test_vld4_lane_s16(int16_t *a, int16x4x4_t b) { return vld4_lane_s16(a, b, 3); } @@ -4944,29 +4944,29 @@ int16x4x4_t test_vld4_lane_s16(int16_t *a, int16x4x4_t b) { // CHECK: [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <2 x i32> -// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i8(<2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, <2 x i32> [[TMP1]]5, i64 1, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32> +// CHECK: [[VLD4_LANE:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i8(<2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP1]]6 +// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int32x2x4_t, %struct.int32x2x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x4_t [[TMP1]]9 +// CHECK: ret %struct.int32x2x4_t [[TMP19]] int32x2x4_t test_vld4_lane_s32(int32_t *a, int32x2x4_t b) { return vld4_lane_s32(a, b, 1); } @@ -4988,29 +4988,29 @@ int32x2x4_t test_vld4_lane_s32(int32_t *a, int32x2x4_t b) { // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <1 x i64> -// CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i8(<1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, <1 x i64> [[TMP1]]5, i64 0, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +// CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i8(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP1]]6 +// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.int64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.int64x1x4_t, %struct.int64x1x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int64x1x4_t [[TMP1]]9 +// CHECK: ret %struct.int64x1x4_t [[TMP19]] int64x1x4_t test_vld4_lane_s64(int64_t *a, int64x1x4_t b) { return vld4_lane_s64(a, b, 0); } @@ -5032,29 +5032,29 @@ int64x1x4_t test_vld4_lane_s64(int64_t *a, int64x1x4_t b) { // CHECK: [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <4 x i16> -// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, <4 x i16> [[TMP1]]5, i64 3, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> +// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]6 +// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float16x4x4_t, %struct.float16x4x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float16x4x4_t [[TMP1]]9 +// CHECK: ret %struct.float16x4x4_t [[TMP19]] float16x4x4_t test_vld4_lane_f16(float16_t *a, float16x4x4_t b) { return vld4_lane_f16(a, b, 3); } @@ -5076,29 +5076,29 @@ float16x4x4_t test_vld4_lane_f16(float16_t *a, float16x4x4_t b) { // CHECK: [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <2 x float> -// CHECK: [[VLD4_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0i8(<2 x float> [[TMP1]]2, <2 x float> [[TMP1]]3, <2 x float> [[TMP1]]4, <2 x float> [[TMP1]]5, i64 1, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float> +// CHECK: [[VLD4_LANE:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0i8(<2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i64 1, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP1]]6 +// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float32x2x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float32x2x4_t, %struct.float32x2x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x4_t [[TMP1]]9 +// CHECK: ret %struct.float32x2x4_t [[TMP19]] float32x2x4_t test_vld4_lane_f32(float32_t *a, float32x2x4_t b) { return vld4_lane_f32(a, b, 1); } @@ -5120,29 +5120,29 @@ float32x2x4_t test_vld4_lane_f32(float32_t *a, float32x2x4_t b) { // CHECK: [[TMP4:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x double> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x double> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <1 x double> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <1 x double> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <1 x double> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x double> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x double> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x double> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <1 x double> -// CHECK: [[VLD4_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0i8(<1 x double> [[TMP1]]2, <1 x double> [[TMP1]]3, <1 x double> [[TMP1]]4, <1 x double> [[TMP1]]5, i64 0, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x double> +// CHECK: [[VLD4_LANE:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0i8(<1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], <1 x double> [[TMP15]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* -// CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP1]]6 +// CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4_LANE]], { <1 x double>, <1 x double>, <1 x double>, <1 x double> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.float64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.float64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.float64x1x4_t, %struct.float64x1x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float64x1x4_t [[TMP1]]9 +// CHECK: ret %struct.float64x1x4_t [[TMP19]] float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) { return vld4_lane_f64(a, b, 0); } @@ -5162,14 +5162,14 @@ float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i64 7, i8* %a) // CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] @@ -5177,7 +5177,7 @@ float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) { // CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP8]], i8* [[TMP9]], i64 32, i32 8, i1 false) // CHECK: [[TMP10:%.*]] = load %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly8x8x4_t [[TMP1]]0 +// CHECK: ret %struct.poly8x8x4_t [[TMP10]] poly8x8x4_t test_vld4_lane_p8(poly8_t *a, poly8x8x4_t b) { return vld4_lane_p8(a, b, 7); } @@ -5199,29 +5199,29 @@ poly8x8x4_t test_vld4_lane_p8(poly8_t *a, poly8x8x4_t b) { // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <4 x i16> -// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, <4 x i16> [[TMP1]]5, i64 3, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16> +// CHECK: [[VLD4_LANE:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i8(<4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i64 3, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP1]]6 +// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly16x4x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x4_t [[TMP1]]9 +// CHECK: ret %struct.poly16x4x4_t [[TMP19]] poly16x4x4_t test_vld4_lane_p16(poly16_t *a, poly16x4x4_t b) { return vld4_lane_p16(a, b, 3); } @@ -5243,29 +5243,29 @@ poly16x4x4_t test_vld4_lane_p16(poly16_t *a, poly16x4x4_t b) { // CHECK: [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 -// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP1]]0 to <8 x i8> +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 +// CHECK: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64> // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64> -// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP1]]1 to <1 x i64> -// CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i8(<1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, <1 x i64> [[TMP1]]5, i64 0, i8* [[TMP3]]) +// CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64> +// CHECK: [[VLD4_LANE:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i8(<1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i64 0, i8* [[TMP3]]) // CHECK: [[TMP16:%.*]] = bitcast i8* [[TMP2]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP1]]6 +// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_LANE]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP16]] // CHECK: [[TMP17:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8* // CHECK: [[TMP18:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]]7, i8* [[TMP1]]8, i64 32, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP17]], i8* [[TMP18]], i64 32, i32 8, i1 false) // CHECK: [[TMP19:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly64x1x4_t [[TMP1]]9 +// CHECK: ret %struct.poly64x1x4_t [[TMP19]] poly64x1x4_t test_vld4_lane_p64(poly64_t *a, poly64x1x4_t b) { return vld4_lane_p64(a, b, 0); } @@ -5594,8 +5594,8 @@ void test_vst1_lane_p64(poly64_t *a, poly64x1_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, i8* %a) // CHECK: ret void void test_vst2q_lane_u8(uint8_t *a, uint8x16x2_t b) { @@ -5616,8 +5616,8 @@ void test_vst2q_lane_u8(uint8_t *a, uint8x16x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -5641,8 +5641,8 @@ void test_vst2q_lane_u16(uint16_t *a, uint16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> @@ -5666,8 +5666,8 @@ void test_vst2q_lane_u32(uint32_t *a, uint32x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> @@ -5689,8 +5689,8 @@ void test_vst2q_lane_u64(uint64_t *a, uint64x2x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, i8* %a) // CHECK: ret void void test_vst2q_lane_s8(int8_t *a, int8x16x2_t b) { @@ -5711,8 +5711,8 @@ void test_vst2q_lane_s8(int8_t *a, int8x16x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -5736,8 +5736,8 @@ void test_vst2q_lane_s16(int16_t *a, int16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> @@ -5761,8 +5761,8 @@ void test_vst2q_lane_s32(int32_t *a, int32x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> @@ -5786,8 +5786,8 @@ void test_vst2q_lane_s64(int64_t *a, int64x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -5811,8 +5811,8 @@ void test_vst2q_lane_f16(float16_t *a, float16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> @@ -5836,8 +5836,8 @@ void test_vst2q_lane_f32(float32_t *a, float32x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], [2 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> @@ -5859,8 +5859,8 @@ void test_vst2q_lane_f64(float64_t *a, float64x2x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 15, i8* %a) // CHECK: ret void void test_vst2q_lane_p8(poly8_t *a, poly8x16x2_t b) { @@ -5881,8 +5881,8 @@ void test_vst2q_lane_p8(poly8_t *a, poly8x16x2_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> @@ -5906,8 +5906,8 @@ void test_vst2q_lane_p16(poly16_t *a, poly16x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> @@ -5929,8 +5929,8 @@ void test_vst2q_lane_p64(poly64_t *a, poly64x2x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, i8* %a) // CHECK: ret void void test_vst2_lane_u8(uint8_t *a, uint8x8x2_t b) { @@ -5951,8 +5951,8 @@ void test_vst2_lane_u8(uint8_t *a, uint8x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -5976,8 +5976,8 @@ void test_vst2_lane_u16(uint16_t *a, uint16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> @@ -6001,8 +6001,8 @@ void test_vst2_lane_u32(uint32_t *a, uint32x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> @@ -6024,8 +6024,8 @@ void test_vst2_lane_u64(uint64_t *a, uint64x1x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, i8* %a) // CHECK: ret void void test_vst2_lane_s8(int8_t *a, int8x8x2_t b) { @@ -6046,8 +6046,8 @@ void test_vst2_lane_s8(int8_t *a, int8x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -6071,8 +6071,8 @@ void test_vst2_lane_s16(int16_t *a, int16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> @@ -6096,8 +6096,8 @@ void test_vst2_lane_s32(int32_t *a, int32x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> @@ -6121,8 +6121,8 @@ void test_vst2_lane_s64(int64_t *a, int64x1x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -6146,8 +6146,8 @@ void test_vst2_lane_f16(float16_t *a, float16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> @@ -6171,8 +6171,8 @@ void test_vst2_lane_f32(float32_t *a, float32x2x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], [2 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> @@ -6194,8 +6194,8 @@ void test_vst2_lane_f64(float64_t *a, float64x1x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], i64 7, i8* %a) // CHECK: ret void void test_vst2_lane_p8(poly8_t *a, poly8x8x2_t b) { @@ -6216,8 +6216,8 @@ void test_vst2_lane_p8(poly8_t *a, poly8x8x2_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> @@ -6241,8 +6241,8 @@ void test_vst2_lane_p16(poly16_t *a, poly16x4x2_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> @@ -6264,11 +6264,11 @@ void test_vst2_lane_p64(poly64_t *a, poly64x1x2_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %a) // CHECK: ret void void test_vst3q_lane_u8(uint8_t *a, uint8x16x3_t b) { @@ -6289,17 +6289,17 @@ void test_vst3q_lane_u8(uint8_t *a, uint8x16x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i64 7, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_u16(uint16_t *a, uint16x8x3_t b) { vst3q_lane_u16(a, b, 7); @@ -6319,17 +6319,17 @@ void test_vst3q_lane_u16(uint16_t *a, uint16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: call void @llvm.aarch64.neon.st3lane.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, i64 3, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_u32(uint32_t *a, uint32x4x3_t b) { vst3q_lane_u32(a, b, 3); @@ -6349,17 +6349,17 @@ void test_vst3q_lane_u32(uint32_t *a, uint32x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i64 1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_u64(uint64_t *a, uint64x2x3_t b) { vst3q_lane_u64(a, b, 1); @@ -6377,11 +6377,11 @@ void test_vst3q_lane_u64(uint64_t *a, uint64x2x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %a) // CHECK: ret void void test_vst3q_lane_s8(int8_t *a, int8x16x3_t b) { @@ -6402,17 +6402,17 @@ void test_vst3q_lane_s8(int8_t *a, int8x16x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i64 7, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_s16(int16_t *a, int16x8x3_t b) { vst3q_lane_s16(a, b, 7); @@ -6432,17 +6432,17 @@ void test_vst3q_lane_s16(int16_t *a, int16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: call void @llvm.aarch64.neon.st3lane.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP1]]0, <4 x i32> [[TMP1]]1, i64 3, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v4i32.p0i8(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_s32(int32_t *a, int32x4x3_t b) { vst3q_lane_s32(a, b, 3); @@ -6462,17 +6462,17 @@ void test_vst3q_lane_s32(int32_t *a, int32x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i64 1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_s64(int64_t *a, int64x2x3_t b) { vst3q_lane_s64(a, b, 1); @@ -6492,17 +6492,17 @@ void test_vst3q_lane_s64(int64_t *a, int64x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i64 7, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_f16(float16_t *a, float16x8x3_t b) { vst3q_lane_f16(a, b, 7); @@ -6522,17 +6522,17 @@ void test_vst3q_lane_f16(float16_t *a, float16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> -// CHECK: call void @llvm.aarch64.neon.st3lane.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP1]]0, <4 x float> [[TMP1]]1, i64 3, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v4f32.p0i8(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_f32(float32_t *a, float32x4x3_t b) { vst3q_lane_f32(a, b, 3); @@ -6552,17 +6552,17 @@ void test_vst3q_lane_f32(float32_t *a, float32x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], [3 x <2 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> -// CHECK: call void @llvm.aarch64.neon.st3lane.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP1]]0, <2 x double> [[TMP1]]1, i64 1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v2f64.p0i8(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_f64(float64_t *a, float64x2x3_t b) { vst3q_lane_f64(a, b, 1); @@ -6580,11 +6580,11 @@ void test_vst3q_lane_f64(float64_t *a, float64x2x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 15, i8* %a) // CHECK: ret void void test_vst3q_lane_p8(poly8_t *a, poly8x16x3_t b) { @@ -6605,17 +6605,17 @@ void test_vst3q_lane_p8(poly8_t *a, poly8x16x3_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP1]]0, <8 x i16> [[TMP1]]1, i64 7, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v8i16.p0i8(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], i64 7, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_p16(poly16_t *a, poly16x8x3_t b) { vst3q_lane_p16(a, b, 7); @@ -6635,17 +6635,17 @@ void test_vst3q_lane_p16(poly16_t *a, poly16x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i64 1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst3q_lane_p64(poly64_t *a, poly64x2x3_t b) { vst3q_lane_p64(a, b, 1); @@ -6663,11 +6663,11 @@ void test_vst3q_lane_p64(poly64_t *a, poly64x2x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) // CHECK: ret void void test_vst3_lane_u8(uint8_t *a, uint8x8x3_t b) { @@ -6688,17 +6688,17 @@ void test_vst3_lane_u8(uint8_t *a, uint8x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i64 3, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_u16(uint16_t *a, uint16x4x3_t b) { vst3_lane_u16(a, b, 3); @@ -6718,17 +6718,17 @@ void test_vst3_lane_u16(uint16_t *a, uint16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: call void @llvm.aarch64.neon.st3lane.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, i64 1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_u32(uint32_t *a, uint32x2x3_t b) { vst3_lane_u32(a, b, 1); @@ -6748,17 +6748,17 @@ void test_vst3_lane_u32(uint32_t *a, uint32x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i64 0, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_u64(uint64_t *a, uint64x1x3_t b) { vst3_lane_u64(a, b, 0); @@ -6776,11 +6776,11 @@ void test_vst3_lane_u64(uint64_t *a, uint64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) // CHECK: ret void void test_vst3_lane_s8(int8_t *a, int8x8x3_t b) { @@ -6801,17 +6801,17 @@ void test_vst3_lane_s8(int8_t *a, int8x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i64 3, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_s16(int16_t *a, int16x4x3_t b) { vst3_lane_s16(a, b, 3); @@ -6831,17 +6831,17 @@ void test_vst3_lane_s16(int16_t *a, int16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: call void @llvm.aarch64.neon.st3lane.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP1]]0, <2 x i32> [[TMP1]]1, i64 1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v2i32.p0i8(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_s32(int32_t *a, int32x2x3_t b) { vst3_lane_s32(a, b, 1); @@ -6861,17 +6861,17 @@ void test_vst3_lane_s32(int32_t *a, int32x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i64 0, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_s64(int64_t *a, int64x1x3_t b) { vst3_lane_s64(a, b, 0); @@ -6891,17 +6891,17 @@ void test_vst3_lane_s64(int64_t *a, int64x1x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i64 3, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_f16(float16_t *a, float16x4x3_t b) { vst3_lane_f16(a, b, 3); @@ -6921,17 +6921,17 @@ void test_vst3_lane_f16(float16_t *a, float16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> -// CHECK: call void @llvm.aarch64.neon.st3lane.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP1]]0, <2 x float> [[TMP1]]1, i64 1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v2f32.p0i8(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_f32(float32_t *a, float32x2x3_t b) { vst3_lane_f32(a, b, 1); @@ -6951,17 +6951,17 @@ void test_vst3_lane_f32(float32_t *a, float32x2x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], [3 x <1 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> -// CHECK: call void @llvm.aarch64.neon.st3lane.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP1]]0, <1 x double> [[TMP1]]1, i64 0, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v1f64.p0i8(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], i64 0, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_f64(float64_t *a, float64x1x3_t b) { vst3_lane_f64(a, b, 0); @@ -6979,11 +6979,11 @@ void test_vst3_lane_f64(float64_t *a, float64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i64 7, i8* %a) // CHECK: ret void void test_vst3_lane_p8(poly8_t *a, poly8x8x3_t b) { @@ -7004,17 +7004,17 @@ void test_vst3_lane_p8(poly8_t *a, poly8x8x3_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP1]]0, <4 x i16> [[TMP1]]1, i64 3, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v4i16.p0i8(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_p16(poly16_t *a, poly16x4x3_t b) { vst3_lane_p16(a, b, 3); @@ -7034,17 +7034,17 @@ void test_vst3_lane_p16(poly16_t *a, poly16x4x3_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i64 0, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3lane.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i64 0, i8* [[TMP2]]) // CHECK: ret void void test_vst3_lane_p64(poly64_t *a, poly64x1x3_t b) { vst3_lane_p64(a, b, 0); @@ -7062,14 +7062,14 @@ void test_vst3_lane_p64(poly64_t *a, poly64x1x3_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %a) // CHECK: ret void void test_vst4q_lane_u8(uint8_t *a, uint8x16x4_t b) { @@ -7090,22 +7090,22 @@ void test_vst4q_lane_u8(uint8_t *a, uint8x16x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i64 7, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> +// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_u16(uint16_t *a, uint16x8x4_t b) { vst4q_lane_u16(a, b, 7); @@ -7125,22 +7125,22 @@ void test_vst4q_lane_u16(uint16_t *a, uint16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x i32> -// CHECK: call void @llvm.aarch64.neon.st4lane.v4i32.p0i8(<4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, i64 3, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> +// CHECK: call void @llvm.aarch64.neon.st4lane.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_u32(uint32_t *a, uint32x4x4_t b) { vst4q_lane_u32(a, b, 3); @@ -7160,22 +7160,22 @@ void test_vst4q_lane_u32(uint32_t *a, uint32x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0i8(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i64 1, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> +// CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_u64(uint64_t *a, uint64x2x4_t b) { vst4q_lane_u64(a, b, 1); @@ -7193,14 +7193,14 @@ void test_vst4q_lane_u64(uint64_t *a, uint64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %a) // CHECK: ret void void test_vst4q_lane_s8(int8_t *a, int8x16x4_t b) { @@ -7221,22 +7221,22 @@ void test_vst4q_lane_s8(int8_t *a, int8x16x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i64 7, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> +// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_s16(int16_t *a, int16x8x4_t b) { vst4q_lane_s16(a, b, 7); @@ -7256,22 +7256,22 @@ void test_vst4q_lane_s16(int16_t *a, int16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x i32> -// CHECK: call void @llvm.aarch64.neon.st4lane.v4i32.p0i8(<4 x i32> [[TMP1]]1, <4 x i32> [[TMP1]]2, <4 x i32> [[TMP1]]3, <4 x i32> [[TMP1]]4, i64 3, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> +// CHECK: call void @llvm.aarch64.neon.st4lane.v4i32.p0i8(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_s32(int32_t *a, int32x4x4_t b) { vst4q_lane_s32(a, b, 3); @@ -7291,22 +7291,22 @@ void test_vst4q_lane_s32(int32_t *a, int32x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0i8(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i64 1, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> +// CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_s64(int64_t *a, int64x2x4_t b) { vst4q_lane_s64(a, b, 1); @@ -7326,22 +7326,22 @@ void test_vst4q_lane_s64(int64_t *a, int64x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i64 7, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> +// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_f16(float16_t *a, float16x8x4_t b) { vst4q_lane_f16(a, b, 7); @@ -7361,22 +7361,22 @@ void test_vst4q_lane_f16(float16_t *a, float16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <4 x float> -// CHECK: call void @llvm.aarch64.neon.st4lane.v4f32.p0i8(<4 x float> [[TMP1]]1, <4 x float> [[TMP1]]2, <4 x float> [[TMP1]]3, <4 x float> [[TMP1]]4, i64 3, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> +// CHECK: call void @llvm.aarch64.neon.st4lane.v4f32.p0i8(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_f32(float32_t *a, float32x4x4_t b) { vst4q_lane_f32(a, b, 3); @@ -7396,22 +7396,22 @@ void test_vst4q_lane_f32(float32_t *a, float32x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], [4 x <2 x double>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x double> -// CHECK: call void @llvm.aarch64.neon.st4lane.v2f64.p0i8(<2 x double> [[TMP1]]1, <2 x double> [[TMP1]]2, <2 x double> [[TMP1]]3, <2 x double> [[TMP1]]4, i64 1, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> +// CHECK: call void @llvm.aarch64.neon.st4lane.v2f64.p0i8(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_f64(float64_t *a, float64x2x4_t b) { vst4q_lane_f64(a, b, 1); @@ -7429,14 +7429,14 @@ void test_vst4q_lane_f64(float64_t *a, float64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16 // CHECK: call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i64 15, i8* %a) // CHECK: ret void void test_vst4q_lane_p8(poly8_t *a, poly8x16x4_t b) { @@ -7457,22 +7457,22 @@ void test_vst4q_lane_p8(poly8_t *a, poly8x16x4_t b) { // CHECK: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <8 x i16> -// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP1]]1, <8 x i16> [[TMP1]]2, <8 x i16> [[TMP1]]3, <8 x i16> [[TMP1]]4, i64 7, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> +// CHECK: call void @llvm.aarch64.neon.st4lane.v8i16.p0i8(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], i64 7, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_p16(poly16_t *a, poly16x8x4_t b) { vst4q_lane_p16(a, b, 7); @@ -7492,22 +7492,22 @@ void test_vst4q_lane_p16(poly16_t *a, poly16x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]2, align 16 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX2]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]4, align 16 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX4]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]6, align 16 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX6]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0i8(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i64 1, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> +// CHECK: call void @llvm.aarch64.neon.st4lane.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst4q_lane_p64(poly64_t *a, poly64x2x4_t b) { vst4q_lane_p64(a, b, 1); @@ -7525,14 +7525,14 @@ void test_vst4q_lane_p64(poly64_t *a, poly64x2x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) // CHECK: ret void void test_vst4_lane_u8(uint8_t *a, uint8x8x4_t b) { @@ -7553,22 +7553,22 @@ void test_vst4_lane_u8(uint8_t *a, uint8x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i64 3, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> +// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_u16(uint16_t *a, uint16x4x4_t b) { vst4_lane_u16(a, b, 3); @@ -7588,22 +7588,22 @@ void test_vst4_lane_u16(uint16_t *a, uint16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x i32> -// CHECK: call void @llvm.aarch64.neon.st4lane.v2i32.p0i8(<2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, i64 1, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> +// CHECK: call void @llvm.aarch64.neon.st4lane.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_u32(uint32_t *a, uint32x2x4_t b) { vst4_lane_u32(a, b, 1); @@ -7623,22 +7623,22 @@ void test_vst4_lane_u32(uint32_t *a, uint32x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0i8(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i64 0, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> +// CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_u64(uint64_t *a, uint64x1x4_t b) { vst4_lane_u64(a, b, 0); @@ -7656,14 +7656,14 @@ void test_vst4_lane_u64(uint64_t *a, uint64x1x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) // CHECK: ret void void test_vst4_lane_s8(int8_t *a, int8x8x4_t b) { @@ -7684,22 +7684,22 @@ void test_vst4_lane_s8(int8_t *a, int8x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i64 3, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> +// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_s16(int16_t *a, int16x4x4_t b) { vst4_lane_s16(a, b, 3); @@ -7719,22 +7719,22 @@ void test_vst4_lane_s16(int16_t *a, int16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x i32> -// CHECK: call void @llvm.aarch64.neon.st4lane.v2i32.p0i8(<2 x i32> [[TMP1]]1, <2 x i32> [[TMP1]]2, <2 x i32> [[TMP1]]3, <2 x i32> [[TMP1]]4, i64 1, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> +// CHECK: call void @llvm.aarch64.neon.st4lane.v2i32.p0i8(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_s32(int32_t *a, int32x2x4_t b) { vst4_lane_s32(a, b, 1); @@ -7754,22 +7754,22 @@ void test_vst4_lane_s32(int32_t *a, int32x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0i8(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i64 0, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> +// CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_s64(int64_t *a, int64x1x4_t b) { vst4_lane_s64(a, b, 0); @@ -7789,22 +7789,22 @@ void test_vst4_lane_s64(int64_t *a, int64x1x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i64 3, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> +// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_f16(float16_t *a, float16x4x4_t b) { vst4_lane_f16(a, b, 3); @@ -7824,22 +7824,22 @@ void test_vst4_lane_f16(float16_t *a, float16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <2 x float> -// CHECK: call void @llvm.aarch64.neon.st4lane.v2f32.p0i8(<2 x float> [[TMP1]]1, <2 x float> [[TMP1]]2, <2 x float> [[TMP1]]3, <2 x float> [[TMP1]]4, i64 1, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> +// CHECK: call void @llvm.aarch64.neon.st4lane.v2f32.p0i8(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], i64 1, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_f32(float32_t *a, float32x2x4_t b) { vst4_lane_f32(a, b, 1); @@ -7859,22 +7859,22 @@ void test_vst4_lane_f32(float32_t *a, float32x2x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], [4 x <1 x double>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x double>, <1 x double>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x double> -// CHECK: call void @llvm.aarch64.neon.st4lane.v1f64.p0i8(<1 x double> [[TMP1]]1, <1 x double> [[TMP1]]2, <1 x double> [[TMP1]]3, <1 x double> [[TMP1]]4, i64 0, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> +// CHECK: call void @llvm.aarch64.neon.st4lane.v1f64.p0i8(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], i64 0, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_f64(float64_t *a, float64x1x4_t b) { vst4_lane_f64(a, b, 0); @@ -7892,14 +7892,14 @@ void test_vst4_lane_f64(float64_t *a, float64x1x4_t b) { // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i64 0, i64 0 // CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 // CHECK: call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i64 7, i8* %a) // CHECK: ret void void test_vst4_lane_p8(poly8_t *a, poly8x8x4_t b) { @@ -7920,22 +7920,22 @@ void test_vst4_lane_p8(poly8_t *a, poly8x8x4_t b) { // CHECK: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <4 x i16> -// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP1]]1, <4 x i16> [[TMP1]]2, <4 x i16> [[TMP1]]3, <4 x i16> [[TMP1]]4, i64 3, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> +// CHECK: call void @llvm.aarch64.neon.st4lane.v4i16.p0i8(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], i64 3, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_p16(poly16_t *a, poly16x4x4_t b) { vst4_lane_p16(a, b, 3); @@ -7955,22 +7955,22 @@ void test_vst4_lane_p16(poly16_t *a, poly16x4x4_t b) { // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]2, align 8 +// CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]3, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]4, align 8 +// CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]5, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]6, align 8 +// CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0i8(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i64 0, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> +// CHECK: call void @llvm.aarch64.neon.st4lane.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i64 0, i8* [[TMP2]]) // CHECK: ret void void test_vst4_lane_p64(poly64_t *a, poly64x1x4_t b) { vst4_lane_p64(a, b, 0); diff --git a/test/CodeGen/aarch64-neon-perm.c b/test/CodeGen/aarch64-neon-perm.c index 7a97161baa..ca9f15d22c 100644 --- a/test/CodeGen/aarch64-neon-perm.c +++ b/test/CodeGen/aarch64-neon-perm.c @@ -931,9 +931,9 @@ int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP1]]1, [2 x <4 x i16>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x2_t [[TMP1]]2 +// CHECK: ret %struct.int16x4x2_t [[TMP12]] int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { return vuzp_s16(a, b); } @@ -958,9 +958,9 @@ int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP1]]1, [2 x <2 x i32>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x2_t [[TMP1]]2 +// CHECK: ret %struct.int32x2x2_t [[TMP12]] int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { return vuzp_s32(a, b); } @@ -1008,9 +1008,9 @@ uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP1]]1, [2 x <4 x i16>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x2_t [[TMP1]]2 +// CHECK: ret %struct.uint16x4x2_t [[TMP12]] uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { return vuzp_u16(a, b); } @@ -1035,9 +1035,9 @@ uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP1]]1, [2 x <2 x i32>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x2_t [[TMP1]]2 +// CHECK: ret %struct.uint32x2x2_t [[TMP12]] uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { return vuzp_u32(a, b); } @@ -1062,9 +1062,9 @@ uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x float>] [[TMP1]]1, [2 x <2 x float>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x2_t [[TMP1]]2 +// CHECK: ret %struct.float32x2x2_t [[TMP12]] float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { return vuzp_f32(a, b); } @@ -1112,9 +1112,9 @@ poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP1]]1, [2 x <4 x i16>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x2_t [[TMP1]]2 +// CHECK: ret %struct.poly16x4x2_t [[TMP12]] poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { return vuzp_p16(a, b); } @@ -1162,9 +1162,9 @@ int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP1]]1, [2 x <8 x i16>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x2_t [[TMP1]]2 +// CHECK: ret %struct.int16x8x2_t [[TMP12]] int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { return vuzpq_s16(a, b); } @@ -1189,9 +1189,9 @@ int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP1]]1, [2 x <4 x i32>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x2_t [[TMP1]]2 +// CHECK: ret %struct.int32x4x2_t [[TMP12]] int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { return vuzpq_s32(a, b); } @@ -1239,9 +1239,9 @@ uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP1]]1, [2 x <8 x i16>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x2_t [[TMP1]]2 +// CHECK: ret %struct.uint16x8x2_t [[TMP12]] uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { return vuzpq_u16(a, b); } @@ -1266,9 +1266,9 @@ uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP1]]1, [2 x <4 x i32>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x2_t [[TMP1]]2 +// CHECK: ret %struct.uint32x4x2_t [[TMP12]] uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { return vuzpq_u32(a, b); } @@ -1293,9 +1293,9 @@ uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x float>] [[TMP1]]1, [2 x <4 x float>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x2_t [[TMP1]]2 +// CHECK: ret %struct.float32x4x2_t [[TMP12]] float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { return vuzpq_f32(a, b); } @@ -1343,9 +1343,9 @@ poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP1]]1, [2 x <8 x i16>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x2_t [[TMP1]]2 +// CHECK: ret %struct.poly16x8x2_t [[TMP12]] poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { return vuzpq_p16(a, b); } @@ -1395,9 +1395,9 @@ int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP1]]1, [2 x <4 x i16>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x2_t [[TMP1]]2 +// CHECK: ret %struct.int16x4x2_t [[TMP12]] int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { return vzip_s16(a, b); } @@ -1422,9 +1422,9 @@ int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP1]]1, [2 x <2 x i32>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x2_t [[TMP1]]2 +// CHECK: ret %struct.int32x2x2_t [[TMP12]] int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { return vzip_s32(a, b); } @@ -1472,9 +1472,9 @@ uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP1]]1, [2 x <4 x i16>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x2_t [[TMP1]]2 +// CHECK: ret %struct.uint16x4x2_t [[TMP12]] uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { return vzip_u16(a, b); } @@ -1499,9 +1499,9 @@ uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP1]]1, [2 x <2 x i32>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x2_t [[TMP1]]2 +// CHECK: ret %struct.uint32x2x2_t [[TMP12]] uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { return vzip_u32(a, b); } @@ -1526,9 +1526,9 @@ uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x float>] [[TMP1]]1, [2 x <2 x float>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x2_t [[TMP1]]2 +// CHECK: ret %struct.float32x2x2_t [[TMP12]] float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { return vzip_f32(a, b); } @@ -1576,9 +1576,9 @@ poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP1]]1, [2 x <4 x i16>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x2_t [[TMP1]]2 +// CHECK: ret %struct.poly16x4x2_t [[TMP12]] poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { return vzip_p16(a, b); } @@ -1626,9 +1626,9 @@ int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP1]]1, [2 x <8 x i16>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x2_t [[TMP1]]2 +// CHECK: ret %struct.int16x8x2_t [[TMP12]] int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { return vzipq_s16(a, b); } @@ -1653,9 +1653,9 @@ int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP1]]1, [2 x <4 x i32>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x2_t [[TMP1]]2 +// CHECK: ret %struct.int32x4x2_t [[TMP12]] int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { return vzipq_s32(a, b); } @@ -1703,9 +1703,9 @@ uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP1]]1, [2 x <8 x i16>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x2_t [[TMP1]]2 +// CHECK: ret %struct.uint16x8x2_t [[TMP12]] uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { return vzipq_u16(a, b); } @@ -1730,9 +1730,9 @@ uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP1]]1, [2 x <4 x i32>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x2_t [[TMP1]]2 +// CHECK: ret %struct.uint32x4x2_t [[TMP12]] uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { return vzipq_u32(a, b); } @@ -1757,9 +1757,9 @@ uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x float>] [[TMP1]]1, [2 x <4 x float>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x2_t [[TMP1]]2 +// CHECK: ret %struct.float32x4x2_t [[TMP12]] float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { return vzipq_f32(a, b); } @@ -1807,9 +1807,9 @@ poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP1]]1, [2 x <8 x i16>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x2_t [[TMP1]]2 +// CHECK: ret %struct.poly16x8x2_t [[TMP12]] poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) { return vzipq_p16(a, b); } @@ -1859,9 +1859,9 @@ int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP1]]1, [2 x <4 x i16>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int16x4x2_t [[TMP1]]2 +// CHECK: ret %struct.int16x4x2_t [[TMP12]] int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { return vtrn_s16(a, b); } @@ -1886,9 +1886,9 @@ int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP1]]1, [2 x <2 x i32>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.int32x2x2_t [[TMP1]]2 +// CHECK: ret %struct.int32x2x2_t [[TMP12]] int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { return vtrn_s32(a, b); } @@ -1936,9 +1936,9 @@ uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP1]]1, [2 x <4 x i16>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint16x4x2_t [[TMP1]]2 +// CHECK: ret %struct.uint16x4x2_t [[TMP12]] uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { return vtrn_u16(a, b); } @@ -1963,9 +1963,9 @@ uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x i32>] [[TMP1]]1, [2 x <2 x i32>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <2 x i32>] [[TMP11]], [2 x <2 x i32>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.uint32x2x2_t [[TMP1]]2 +// CHECK: ret %struct.uint32x2x2_t [[TMP12]] uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { return vtrn_u32(a, b); } @@ -1990,9 +1990,9 @@ uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { // CHECK: [[TMP9:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x2x2_t [[TMP9]], 0 -// CHECK: store [2 x <2 x float>] [[TMP1]]1, [2 x <2 x float>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <2 x float>] [[TMP11]], [2 x <2 x float>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.float32x2x2_t [[TMP1]]2 +// CHECK: ret %struct.float32x2x2_t [[TMP12]] float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { return vtrn_f32(a, b); } @@ -2040,9 +2040,9 @@ poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i16>] [[TMP1]]1, [2 x <4 x i16>]* [[TMP1]]0, align 8 +// CHECK: store [2 x <4 x i16>] [[TMP11]], [2 x <4 x i16>]* [[TMP10]], align 8 // CHECK: [[TMP12:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8 -// CHECK: ret %struct.poly16x4x2_t [[TMP1]]2 +// CHECK: ret %struct.poly16x4x2_t [[TMP12]] poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { return vtrn_p16(a, b); } @@ -2090,9 +2090,9 @@ int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP1]]1, [2 x <8 x i16>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int16x8x2_t [[TMP1]]2 +// CHECK: ret %struct.int16x8x2_t [[TMP12]] int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { return vtrnq_s16(a, b); } @@ -2117,9 +2117,9 @@ int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.int32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP1]]1, [2 x <4 x i32>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.int32x4x2_t [[TMP1]]2 +// CHECK: ret %struct.int32x4x2_t [[TMP12]] int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { return vtrnq_s32(a, b); } @@ -2167,9 +2167,9 @@ uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP1]]1, [2 x <8 x i16>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint16x8x2_t [[TMP1]]2 +// CHECK: ret %struct.uint16x8x2_t [[TMP12]] uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { return vtrnq_u16(a, b); } @@ -2194,9 +2194,9 @@ uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { // CHECK: [[TMP9:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x i32>] [[TMP1]]1, [2 x <4 x i32>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <4 x i32>] [[TMP11]], [2 x <4 x i32>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.uint32x4x2_t [[TMP1]]2 +// CHECK: ret %struct.uint32x4x2_t [[TMP12]] uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { return vtrnq_u32(a, b); } @@ -2221,9 +2221,9 @@ uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { // CHECK: [[TMP9:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.float32x4x2_t [[TMP9]], 0 -// CHECK: store [2 x <4 x float>] [[TMP1]]1, [2 x <4 x float>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <4 x float>] [[TMP11]], [2 x <4 x float>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.float32x4x2_t [[TMP1]]2 +// CHECK: ret %struct.float32x4x2_t [[TMP12]] float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { return vtrnq_f32(a, b); } @@ -2271,9 +2271,9 @@ poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { // CHECK: [[TMP9:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16 // CHECK: [[TMP10:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0 // CHECK: [[TMP11:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP9]], 0 -// CHECK: store [2 x <8 x i16>] [[TMP1]]1, [2 x <8 x i16>]* [[TMP1]]0, align 16 +// CHECK: store [2 x <8 x i16>] [[TMP11]], [2 x <8 x i16>]* [[TMP10]], align 16 // CHECK: [[TMP12:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16 -// CHECK: ret %struct.poly16x8x2_t [[TMP1]]2 +// CHECK: ret %struct.poly16x8x2_t [[TMP12]] poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { return vtrnq_p16(a, b); } diff --git a/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c b/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c index b7f0b2a00d..50ed582614 100644 --- a/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c +++ b/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c @@ -104,7 +104,7 @@ float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) { // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> // CHECK: [[VGET_LANE6:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 -// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE]]6) #2 +// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]]) #2 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8> // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0 @@ -196,8 +196,8 @@ float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) { // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA]]1) -// CHECK: ret <1 x double> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]]) +// CHECK: ret <1 x double> [[FMLA2]] float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { return vfma_lane_f64(a, b, v, 0); } @@ -211,8 +211,8 @@ float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> // CHECK: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA]]1) -// CHECK: ret <1 x double> [[FMLA]]2 +// CHECK: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]]) +// CHECK: ret <1 x double> [[FMLA2]] float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { return vfms_lane_f64(a, b, v, 0); } @@ -398,8 +398,8 @@ int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) { // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) // CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0 -// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE]]0) -// CHECK: ret i32 [[VQDMLXL]]1 +// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0]]) +// CHECK: ret i32 [[VQDMLXL1]] int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) { return vqdmlalh_lane_s16(a, b, c, 3); } @@ -410,7 +410,7 @@ int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) { // CHECK: [[LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]]) -// CHECK: ret i64 [[VQDMLXL]]1 +// CHECK: ret i64 [[VQDMLXL1]] int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) { return vqdmlals_lane_s32(a, b, c, 1); } @@ -423,8 +423,8 @@ int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) { // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) // CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0 -// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE]]0) -// CHECK: ret i32 [[VQDMLXL]]1 +// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0]]) +// CHECK: ret i32 [[VQDMLXL1]] int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) { return vqdmlalh_laneq_s16(a, b, c, 7); } @@ -435,7 +435,7 @@ int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) { // CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]]) -// CHECK: ret i64 [[VQDMLXL]]1 +// CHECK: ret i64 [[VQDMLXL1]] int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) { return vqdmlals_laneq_s32(a, b, c, 3); } @@ -448,8 +448,8 @@ int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) { // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) // CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0 -// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE]]0) -// CHECK: ret i32 [[VQDMLXL]]1 +// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0]]) +// CHECK: ret i32 [[VQDMLXL1]] int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) { return vqdmlslh_lane_s16(a, b, c, 3); } @@ -460,7 +460,7 @@ int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) { // CHECK: [[LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]]) -// CHECK: ret i64 [[VQDMLXL]]1 +// CHECK: ret i64 [[VQDMLXL1]] int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) { return vqdmlsls_lane_s32(a, b, c, 1); } @@ -473,8 +473,8 @@ int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) { // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) // CHECK: [[LANE0:%.*]] = extractelement <4 x i32> [[VQDMLXL]], i64 0 -// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE]]0) -// CHECK: ret i32 [[VQDMLXL]]1 +// CHECK: [[VQDMLXL1:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0]]) +// CHECK: ret i32 [[VQDMLXL1]] int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) { return vqdmlslh_laneq_s16(a, b, c, 7); } @@ -485,7 +485,7 @@ int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) { // CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]]) -// CHECK: ret i64 [[VQDMLXL]]1 +// CHECK: ret i64 [[VQDMLXL1]] int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) { return vqdmlsls_laneq_s32(a, b, c, 3); } @@ -499,7 +499,7 @@ int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) { // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP1]] to <8 x i8> // CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> // CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP5]], i32 0 -// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE]]7) #2 +// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]]) #2 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]], double [[VMULXD_F64_I]], i32 0 diff --git a/test/CodeGen/aarch64-neon-tbl.c b/test/CodeGen/aarch64-neon-tbl.c index 9a6a397be9..0cc66453ac 100644 --- a/test/CodeGen/aarch64-neon-tbl.c +++ b/test/CodeGen/aarch64-neon-tbl.c @@ -26,15 +26,15 @@ int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0 // CHECK: store [2 x <8 x i8>] [[A]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx.i, align 8 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx2.i, align 8 +// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL13_I]] @@ -48,15 +48,15 @@ int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL2_I]] int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) { @@ -69,18 +69,18 @@ int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0 // CHECK: store [3 x <8 x i8>] [[A]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx.i, align 8 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx2.i, align 8 +// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx4.i, align 8 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2 @@ -95,18 +95,18 @@ int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL3_I]] int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) { @@ -119,21 +119,21 @@ int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0 // CHECK: store [4 x <8 x i8>] [[A]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx.i, align 8 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx2.i, align 8 +// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx4.i, align 8 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx6.i, align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> // CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2 @@ -148,21 +148,21 @@ int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx6.i, align 16 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL4_I]] int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) { @@ -182,15 +182,15 @@ int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2 // CHECK: ret <16 x i8> [[VTBL2_I]] int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) { @@ -203,18 +203,18 @@ int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2 // CHECK: ret <16 x i8> [[VTBL3_I]] int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) { @@ -227,21 +227,21 @@ int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx6.i, align 16 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 // CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2 // CHECK: ret <16 x i8> [[VTBL4_I]] int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) { @@ -268,9 +268,9 @@ int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 @@ -290,9 +290,9 @@ int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 @@ -322,9 +322,9 @@ int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 @@ -358,9 +358,9 @@ int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -379,9 +379,9 @@ int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -403,9 +403,9 @@ int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -437,9 +437,9 @@ int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -458,9 +458,9 @@ int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -482,9 +482,9 @@ int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -524,15 +524,15 @@ uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0 // CHECK: store [2 x <8 x i8>] [[A]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx.i, align 8 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx2.i, align 8 +// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL13_I]] @@ -546,15 +546,15 @@ uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL2_I]] uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) { @@ -567,18 +567,18 @@ uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0 // CHECK: store [3 x <8 x i8>] [[A]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx.i, align 8 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx2.i, align 8 +// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx4.i, align 8 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2 @@ -593,18 +593,18 @@ uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL3_I]] uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) { @@ -617,21 +617,21 @@ uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0 // CHECK: store [4 x <8 x i8>] [[A]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx.i, align 8 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx2.i, align 8 +// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx4.i, align 8 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx6.i, align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> // CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2 @@ -646,21 +646,21 @@ uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx6.i, align 16 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL4_I]] uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) { @@ -680,15 +680,15 @@ uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2 // CHECK: ret <16 x i8> [[VTBL2_I]] uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) { @@ -701,18 +701,18 @@ uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2 // CHECK: ret <16 x i8> [[VTBL3_I]] uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) { @@ -725,21 +725,21 @@ uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx6.i, align 16 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 // CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2 // CHECK: ret <16 x i8> [[VTBL4_I]] uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) { @@ -766,9 +766,9 @@ uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 @@ -788,9 +788,9 @@ uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 @@ -820,9 +820,9 @@ uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 @@ -856,9 +856,9 @@ uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -877,9 +877,9 @@ uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -901,9 +901,9 @@ uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -935,9 +935,9 @@ uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -956,9 +956,9 @@ uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -980,9 +980,9 @@ uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -1022,15 +1022,15 @@ poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0 // CHECK: store [2 x <8 x i8>] [[A]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx.i, align 8 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx2.i, align 8 +// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL13_I]] @@ -1044,15 +1044,15 @@ poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL2_I]] poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) { @@ -1065,18 +1065,18 @@ poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0 // CHECK: store [3 x <8 x i8>] [[A]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx.i, align 8 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx2.i, align 8 +// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx4.i, align 8 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> // CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2 @@ -1091,18 +1091,18 @@ poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL3_I]] poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) { @@ -1115,21 +1115,21 @@ poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0 // CHECK: store [4 x <8 x i8>] [[A]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx.i, align 8 +// CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx2.i, align 8 +// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx4.i, align 8 +// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i64 0, i64 3 -// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[A]]rrayidx6.i, align 8 +// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8 // CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> // CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> // CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2 @@ -1144,21 +1144,21 @@ poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx6.i, align 16 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 // CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2 // CHECK: ret <8 x i8> [[VTBL4_I]] poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) { @@ -1178,15 +1178,15 @@ poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[A]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2 // CHECK: ret <16 x i8> [[VTBL2_I]] poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) { @@ -1199,18 +1199,18 @@ poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[A]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2 // CHECK: ret <16 x i8> [[VTBL3_I]] poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) { @@ -1223,21 +1223,21 @@ poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[A]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 -// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx.i, align 16 +// CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 // CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1 -// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx2.i, align 16 +// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16 // CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2 -// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx4.i, align 16 +// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16 // CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3 -// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[A]]rrayidx6.i, align 16 +// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16 // CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2 // CHECK: ret <16 x i8> [[VTBL4_I]] poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) { @@ -1264,9 +1264,9 @@ poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 // CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [2 x <8 x i8>], [2 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [2 x <8 x i8>] [[TMP0]], [2 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 @@ -1286,9 +1286,9 @@ poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 // CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [3 x <8 x i8>], [3 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [3 x <8 x i8>] [[TMP0]], [3 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 @@ -1318,9 +1318,9 @@ poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 // CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE]]1, align 8 +// CHECK: [[TMP0:%.*]] = load [4 x <8 x i8>], [4 x <8 x i8>]* [[COERCE_DIVE1]], align 8 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE]].i, align 8 +// CHECK: store [4 x <8 x i8>] [[TMP0]], [4 x <8 x i8>]* [[COERCE_DIVE_I]], align 8 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8 @@ -1354,9 +1354,9 @@ poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -1375,9 +1375,9 @@ poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -1399,9 +1399,9 @@ poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -1433,9 +1433,9 @@ poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 // CHECK: store [2 x <16 x i8>] [[B]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [2 x <16 x i8>], [2 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [2 x <16 x i8>] [[TMP0]], [2 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -1454,9 +1454,9 @@ poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 // CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [3 x <16 x i8>], [3 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [3 x <16 x i8>] [[TMP0]], [3 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 @@ -1478,9 +1478,9 @@ poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) { // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 // CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16 // CHECK: [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 -// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE]]1, align 16 +// CHECK: [[TMP0:%.*]] = load [4 x <16 x i8>], [4 x <16 x i8>]* [[COERCE_DIVE1]], align 16 // CHECK: [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0 -// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE]].i, align 16 +// CHECK: store [4 x <16 x i8>] [[TMP0]], [4 x <16 x i8>]* [[COERCE_DIVE_I]], align 16 // CHECK: [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0 // CHECK: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL_I]], i64 0, i64 0 // CHECK: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX_I]], align 16 diff --git a/test/CodeGen/aarch64-poly64.c b/test/CodeGen/aarch64-poly64.c index 51084b0759..762ca94e5e 100644 --- a/test/CodeGen/aarch64-poly64.c +++ b/test/CodeGen/aarch64-poly64.c @@ -375,12 +375,12 @@ poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) { // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 16, i32 8, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]1, i64 0, i64 0 +// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]]2, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]3, align 8 +// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL2]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> @@ -400,12 +400,12 @@ void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) { // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]1, i64 0, i64 0 +// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL]]2, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]3, align 16 +// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL2]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> @@ -425,21 +425,21 @@ void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) { // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 24, i32 8, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]1, i64 0, i64 0 +// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]2, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]3, align 8 +// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL2]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]]4, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]5, align 8 +// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL4]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX5]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP1]]0, <1 x i64> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) { return vst3_p64(ptr, val); @@ -455,21 +455,21 @@ void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) { // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 48, i32 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]1, i64 0, i64 0 +// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]2, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]3, align 16 +// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL2]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL]]4, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]5, align 16 +// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL4]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX5]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP1]]0, <2 x i64> [[TMP1]]1, i8* [[TMP2]]) +// CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]]) // CHECK: ret void void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) { return vst3q_p64(ptr, val); @@ -485,26 +485,26 @@ void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) { // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 32, i32 8, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]1, i64 0, i64 0 +// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]2, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]3, align 8 +// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL2]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]4, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]5, align 8 +// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL4]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX5]], align 8 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> // CHECK: [[VAL6:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]]6, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]]7, align 8 +// CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL6]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX7]], align 8 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP1]]0 to <1 x i64> -// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP1]]1, <1 x i64> [[TMP1]]2, <1 x i64> [[TMP1]]3, <1 x i64> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> +// CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) { return vst4_p64(ptr, val); @@ -520,26 +520,26 @@ void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) { // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP0]], i8* [[TMP1]], i64 64, i32 16, i1 false) // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8* // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]1, i64 0, i64 0 +// CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 0 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]2, i64 0, i64 1 -// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]3, align 16 +// CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL2]], i64 0, i64 1 +// CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]4, i64 0, i64 2 -// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]5, align 16 +// CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL4]], i64 0, i64 2 +// CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX5]], align 16 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> // CHECK: [[VAL6:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0 -// CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL]]6, i64 0, i64 3 -// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]]7, align 16 +// CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL6]], i64 0, i64 3 +// CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX7]], align 16 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> -// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP1]]0 to <2 x i64> -// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP1]]1, <2 x i64> [[TMP1]]2, <2 x i64> [[TMP1]]3, <2 x i64> [[TMP1]]4, i8* [[TMP2]]) +// CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> +// CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]]) // CHECK: ret void void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) { return vst4q_p64(ptr, val); @@ -615,8 +615,8 @@ poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) { // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N]]1, i32 33) -// CHECK: ret <1 x i64> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 33) +// CHECK: ret <1 x i64> [[VSRI_N2]] poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) { return vsri_n_p64(a, b, 33); } @@ -626,8 +626,8 @@ poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) { // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N]]1, i32 64) -// CHECK: ret <2 x i64> [[VSRI_N]]2 +// CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 64) +// CHECK: ret <2 x i64> [[VSRI_N2]] poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) { return vsriq_n_p64(a, b, 64); } diff --git a/test/CodeGen/arm64_vdupq_n_f64.c b/test/CodeGen/arm64_vdupq_n_f64.c index 8e9f7a4ed8..58cc7f020e 100644 --- a/test/CodeGen/arm64_vdupq_n_f64.c +++ b/test/CodeGen/arm64_vdupq_n_f64.c @@ -48,9 +48,9 @@ float64x2_t test_vmovq_n_f64(float64_t w) { // CHECK: [[TMP0:%.*]] = load half, half* %a1, align 2 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1 -// CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT]]1, half [[TMP0]], i32 2 -// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT]]2, half [[TMP0]], i32 3 -// CHECK: ret <4 x half> [[VECINIT]]3 +// CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2 +// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3 +// CHECK: ret <4 x half> [[VECINIT3]] float16x4_t test_vmov_n_f16(float16_t *a1) { return vmov_n_f16(*a1); } @@ -65,13 +65,13 @@ float64x1_t test_vmov_n_f64(float64_t a1) { // CHECK: [[TMP0:%.*]] = load half, half* %a1, align 2 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1 -// CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT]]1, half [[TMP0]], i32 2 -// CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT]]2, half [[TMP0]], i32 3 -// CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT]]3, half [[TMP0]], i32 4 -// CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT]]4, half [[TMP0]], i32 5 -// CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT]]5, half [[TMP0]], i32 6 -// CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT]]6, half [[TMP0]], i32 7 -// CHECK: ret <8 x half> [[VECINIT]]7 +// CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2 +// CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3 +// CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4 +// CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5 +// CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6 +// CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7 +// CHECK: ret <8 x half> [[VECINIT7]] float16x8_t test_vmovq_n_f16(float16_t *a1) { return vmovq_n_f16(*a1); }