}
// CHECK-LABEL: @test_vfmas_laneq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> %v, i32 3
// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
// CHECK: ret float [[TMP2]]
float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) {
// CHECK-LABEL: @test_vfmsd_lane_f64(
// CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %v to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
+// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> %v, i32 0
// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a)
// CHECK: ret double [[TMP2]]
float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) {
// CHECK-LABEL: @test_vfmss_laneq_f32(
// CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> %v, i32 3
// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
// CHECK: ret float [[TMP2]]
float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) {
// CHECK-LABEL: @test_vfmsd_laneq_f64(
// CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> %v, i32 1
// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a)
// CHECK: ret double [[TMP2]]
float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) {
}
// CHECK-LABEL: @test_vpaddd_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
// CHECK: ret i64 [[VPADDD_S64_I]]
int64_t test_vpaddd_s64(int64x2_t a) {
}
// CHECK-LABEL: @test_vpadds_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0
// CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1
// CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]]
}
// CHECK-LABEL: @test_vpaddd_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0
// CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1
// CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]]
}
// CHECK-LABEL: @test_vpmaxnms_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
// CHECK: ret float [[VPMAXNMS_F32_I]]
float32_t test_vpmaxnms_f32(float32x2_t a) {
}
// CHECK-LABEL: @test_vpmaxnmqd_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
// CHECK: ret double [[VPMAXNMQD_F64_I]]
float64_t test_vpmaxnmqd_f64(float64x2_t a) {
}
// CHECK-LABEL: @test_vpmaxs_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
// CHECK: ret float [[VPMAXS_F32_I]]
float32_t test_vpmaxs_f32(float32x2_t a) {
}
// CHECK-LABEL: @test_vpmaxqd_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
// CHECK: ret double [[VPMAXQD_F64_I]]
float64_t test_vpmaxqd_f64(float64x2_t a) {
}
// CHECK-LABEL: @test_vpminnms_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
// CHECK: ret float [[VPMINNMS_F32_I]]
float32_t test_vpminnms_f32(float32x2_t a) {
}
// CHECK-LABEL: @test_vpminnmqd_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
// CHECK: ret double [[VPMINNMQD_F64_I]]
float64_t test_vpminnmqd_f64(float64x2_t a) {
}
// CHECK-LABEL: @test_vpmins_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
// CHECK: ret float [[VPMINS_F32_I]]
float32_t test_vpmins_f32(float32x2_t a) {
}
// CHECK-LABEL: @test_vpminqd_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
// CHECK: ret double [[VPMINQD_F64_I]]
float64_t test_vpminqd_f64(float64x2_t a) {
}
// CHECK-LABEL: @test_vaddv_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a)
// CHECK: ret float [[VADDV_F32_I]]
float32_t test_vaddv_f32(float32x2_t a) {
}
// CHECK-LABEL: @test_vaddvq_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
// CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a)
// CHECK: ret float [[VADDVQ_F32_I]]
float32_t test_vaddvq_f32(float32x4_t a) {
}
// CHECK-LABEL: @test_vaddvq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a)
// CHECK: ret double [[VADDVQ_F64_I]]
float64_t test_vaddvq_f64(float64x2_t a) {
}
// CHECK-LABEL: @test_vmaxv_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
// CHECK: ret float [[VMAXV_F32_I]]
float32_t test_vmaxv_f32(float32x2_t a) {
}
// CHECK-LABEL: @test_vmaxvq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a)
// CHECK: ret double [[VMAXVQ_F64_I]]
float64_t test_vmaxvq_f64(float64x2_t a) {
}
// CHECK-LABEL: @test_vminv_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a)
// CHECK: ret float [[VMINV_F32_I]]
float32_t test_vminv_f32(float32x2_t a) {
}
// CHECK-LABEL: @test_vminvq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a)
// CHECK: ret double [[VMINVQ_F64_I]]
float64_t test_vminvq_f64(float64x2_t a) {
}
// CHECK-LABEL: @test_vmaxnmvq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a)
// CHECK: ret double [[VMAXNMVQ_F64_I]]
float64_t test_vmaxnmvq_f64(float64x2_t a) {
}
// CHECK-LABEL: @test_vmaxnmv_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a)
// CHECK: ret float [[VMAXNMV_F32_I]]
float32_t test_vmaxnmv_f32(float32x2_t a) {
}
// CHECK-LABEL: @test_vminnmvq_f64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a)
// CHECK: ret double [[VMINNMVQ_F64_I]]
float64_t test_vminnmvq_f64(float64x2_t a) {
}
// CHECK-LABEL: @test_vminnmv_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
// CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a)
// CHECK: ret float [[VMINNMV_F32_I]]
float32_t test_vminnmv_f32(float32x2_t a) {
}
// CHECK-LABEL: @test_vpaddd_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
// CHECK: ret i64 [[VPADDD_U64_I]]
uint64_t test_vpaddd_u64(uint64x2_t a) {
}
// CHECK-LABEL: @test_vaddvq_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a)
// CHECK: ret i64 [[VADDVQ_S64_I]]
int64_t test_vaddvq_s64(int64x2_t a) {
}
// CHECK-LABEL: @test_vaddvq_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a)
// CHECK: ret i64 [[VADDVQ_U64_I]]
uint64_t test_vaddvq_u64(uint64x2_t a) {
}
// CHECK-LABEL: @test_vminv_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
// CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a)
// CHECK: ret i32 [[VMINV_S32_I]]
int32_t test_vminv_s32(int32x2_t a) {
}
// CHECK-LABEL: @test_vminv_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
// CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a)
// CHECK: ret i32 [[VMINV_U32_I]]
uint32_t test_vminv_u32(uint32x2_t a) {
}
// CHECK-LABEL: @test_vmaxv_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
// CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a)
// CHECK: ret i32 [[VMAXV_S32_I]]
int32_t test_vmaxv_s32(int32x2_t a) {
}
// CHECK-LABEL: @test_vmaxv_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
// CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a)
// CHECK: ret i32 [[VMAXV_U32_I]]
uint32_t test_vmaxv_u32(uint32x2_t a) {
}
// CHECK-LABEL: @test_vaddv_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
// CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a)
// CHECK: ret i32 [[VADDV_S32_I]]
int32_t test_vaddv_s32(int32x2_t a) {
}
// CHECK-LABEL: @test_vaddv_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
// CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a)
// CHECK: ret i32 [[VADDV_U32_I]]
uint32_t test_vaddv_u32(uint32x2_t a) {
}
// CHECK-LABEL: @test_vaddlv_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
// CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a)
// CHECK: ret i64 [[VADDLV_S32_I]]
int64_t test_vaddlv_s32(int32x2_t a) {
}
// CHECK-LABEL: @test_vaddlv_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
// CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a)
// CHECK: ret i64 [[VADDLV_U32_I]]
uint64_t test_vaddlv_u32(uint32x2_t a) {
#include <arm_neon.h>
// CHECK-LABEL: define float @test_vdups_lane_f32(<2 x float> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VDUPS_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+// CHECK: [[VDUPS_LANE:%.*]] = extractelement <2 x float> %a, i32 1
// CHECK: ret float [[VDUPS_LANE]]
float32_t test_vdups_lane_f32(float32x2_t a) {
return vdups_lane_f32(a, 1);
// CHECK-LABEL: define double @test_vdupd_lane_f64(<1 x double> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[VDUPD_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
+// CHECK: [[VDUPD_LANE:%.*]] = extractelement <1 x double> %a, i32 0
// CHECK: ret double [[VDUPD_LANE]]
float64_t test_vdupd_lane_f64(float64x1_t a) {
return vdupd_lane_f64(a, 0);
// CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a, i32 3
// CHECK: ret float [[VGETQ_LANE]]
float32_t test_vdups_laneq_f32(float32x4_t a) {
return vdups_laneq_f32(a, 3);
// CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %a, i32 1
// CHECK: ret double [[VGETQ_LANE]]
float64_t test_vdupd_laneq_f64(float64x2_t a) {
return vdupd_laneq_f64(a, 1);
// CHECK-LABEL: define i16 @test_vduph_lane_s16(<4 x i16> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
int16_t test_vduph_lane_s16(int16x4_t a) {
return vduph_lane_s16(a, 3);
// CHECK-LABEL: define i32 @test_vdups_lane_s32(<2 x i32> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
// CHECK: ret i32 [[VGET_LANE]]
int32_t test_vdups_lane_s32(int32x2_t a) {
return vdups_lane_s32(a, 1);
// CHECK-LABEL: define i64 @test_vdupd_lane_s64(<1 x i64> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
// CHECK: ret i64 [[VGET_LANE]]
int64_t test_vdupd_lane_s64(int64x1_t a) {
return vdupd_lane_s64(a, 0);
// CHECK-LABEL: define i16 @test_vduph_lane_u16(<4 x i16> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
uint16_t test_vduph_lane_u16(uint16x4_t a) {
return vduph_lane_u16(a, 3);
// CHECK-LABEL: define i32 @test_vdups_lane_u32(<2 x i32> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
// CHECK: ret i32 [[VGET_LANE]]
uint32_t test_vdups_lane_u32(uint32x2_t a) {
return vdups_lane_u32(a, 1);
// CHECK-LABEL: define i64 @test_vdupd_lane_u64(<1 x i64> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
// CHECK: ret i64 [[VGET_LANE]]
uint64_t test_vdupd_lane_u64(uint64x1_t a) {
return vdupd_lane_u64(a, 0);
// CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGETQ_LANE]]
int16_t test_vduph_laneq_s16(int16x8_t a) {
return vduph_laneq_s16(a, 7);
// CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
// CHECK: ret i32 [[VGETQ_LANE]]
int32_t test_vdups_laneq_s32(int32x4_t a) {
return vdups_laneq_s32(a, 3);
// CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
// CHECK: ret i64 [[VGETQ_LANE]]
int64_t test_vdupd_laneq_s64(int64x2_t a) {
return vdupd_laneq_s64(a, 1);
// CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGETQ_LANE]]
uint16_t test_vduph_laneq_u16(uint16x8_t a) {
return vduph_laneq_u16(a, 7);
// CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
// CHECK: ret i32 [[VGETQ_LANE]]
uint32_t test_vdups_laneq_u32(uint32x4_t a) {
return vdups_laneq_u32(a, 3);
// CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
// CHECK: ret i64 [[VGETQ_LANE]]
uint64_t test_vdupd_laneq_u64(uint64x2_t a) {
return vdupd_laneq_u64(a, 1);
}
// CHECK-LABEL: define i16 @test_vduph_lane_p16(<4 x i16> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
poly16_t test_vduph_lane_p16(poly16x4_t a) {
return vduph_lane_p16(a, 3);
}
// CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGETQ_LANE]]
poly16_t test_vduph_laneq_p16(poly16x8_t a) {
return vduph_laneq_p16(a, 7);
// CHECK-LABEL: define float @test_vmuls_lane_f32(float %a, <2 x float> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
// CHECK: [[MUL:%.*]] = fmul float %a, [[VGET_LANE]]
// CHECK: ret float [[MUL]]
float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) {
}
// CHECK-LABEL: define double @test_vmuld_lane_f64(double %a, <1 x double> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
// CHECK: [[MUL:%.*]] = fmul double %a, [[VGET_LANE]]
// CHECK: ret double [[MUL]]
float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) {
}
// CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
// CHECK: [[MUL:%.*]] = fmul float %a, [[VGETQ_LANE]]
// CHECK: ret float [[MUL]]
float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) {
}
// CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
// CHECK: [[MUL:%.*]] = fmul double %a, [[VGETQ_LANE]]
// CHECK: ret double [[MUL]]
float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
}
// CHECK-LABEL: define float @test_vmulxs_lane_f32(float %a, <2 x float> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1
// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]])
// CHECK: ret float [[VMULXS_F32_I]]
float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) {
}
// CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3
// CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]])
// CHECK: ret float [[VMULXS_F32_I]]
float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) {
}
// CHECK-LABEL: define double @test_vmulxd_lane_f64(double %a, <1 x double> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0
// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]])
// CHECK: ret double [[VMULXD_F64_I]]
float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) {
}
// CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]])
// CHECK: ret double [[VMULXD_F64_I]]
float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
}
// CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64(<1 x double> %a, <1 x double> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
-// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %b to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// CHECK: [[VGET_LANE6:%.*]] = extractelement <1 x double> [[TMP3]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
+// CHECK: [[VGET_LANE6:%.*]] = extractelement <1 x double> %b, i32 0
// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]])
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
// CHECK: ret <1 x double> [[VSET_LANE]]
float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
return vmulx_lane_f64(a, b, 0);
// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
-// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 0
// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
// CHECK: ret <1 x double> [[VSET_LANE]]
float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
return vmulx_laneq_f64(a, b, 0);
}
// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
-// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %b to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1
// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0
// CHECK: ret <1 x double> [[VSET_LANE]]
float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
return vmulx_laneq_f64(a, b, 1);
// CHECK-LABEL: define float @test_vfmas_lane_f32(float %a, float %b, <2 x float> %c) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %c to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
// CHECK: ret float [[TMP2]]
float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
}
// CHECK-LABEL: define double @test_vfmad_lane_f64(double %a, double %b, <1 x double> %c) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %c to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
-// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
+// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> %c, i32 0
// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
// CHECK: ret double [[TMP2]]
float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
}
// CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %c to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
+// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> %c, i32 1
// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
// CHECK: ret double [[TMP2]]
float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
// CHECK-LABEL: define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %c) #0 {
// CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %c to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
// CHECK: ret float [[TMP2]]
float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
}
// CHECK-LABEL: define i32 @test_vqdmullh_lane_s16(i16 %a, <4 x i16> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
}
// CHECK-LABEL: define i64 @test_vqdmulls_lane_s32(i32 %a, <2 x i32> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]])
// CHECK: ret i64 [[VQDMULLS_S32_I]]
int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
}
// CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
// CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
}
// CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
// CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]])
// CHECK: ret i64 [[VQDMULLS_S32_I]]
int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) {
}
// CHECK-LABEL: define i16 @test_vqdmulhh_lane_s16(i16 %a, <4 x i16> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
}
// CHECK-LABEL: define i32 @test_vqdmulhs_lane_s32(i32 %a, <2 x i32> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]])
// CHECK: ret i32 [[VQDMULHS_S32_I]]
int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
// CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
// CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
// CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
// CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
// CHECK: ret i32 [[VQDMULHS_S32_I]]
int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) {
}
// CHECK-LABEL: define i16 @test_vqrdmulhh_lane_s16(i16 %a, <4 x i16> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3
// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0
// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
}
// CHECK-LABEL: define i32 @test_vqrdmulhs_lane_s32(i32 %a, <2 x i32> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1
// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]])
// CHECK: ret i32 [[VQRDMULHS_S32_I]]
int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
// CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7
// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0
// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0
// CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
// CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3
// CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]])
// CHECK: ret i32 [[VQRDMULHS_S32_I]]
int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) {
}
// CHECK-LABEL: define i32 @test_vqdmlalh_lane_s16(i32 %a, i16 %b, <4 x i16> %c) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3
// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
// CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
}
// CHECK-LABEL: define i64 @test_vqdmlals_lane_s32(i64 %a, i32 %b, <2 x i32> %c) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1
// CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
// CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]])
// CHECK: ret i64 [[VQDMLXL1]]
}
// CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7
// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
// CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
}
// CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3
// CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
// CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]])
// CHECK: ret i64 [[VQDMLXL1]]
}
// CHECK-LABEL: define i32 @test_vqdmlslh_lane_s16(i32 %a, i16 %b, <4 x i16> %c) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %c to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3
// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
// CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
}
// CHECK-LABEL: define i64 @test_vqdmlsls_lane_s32(i64 %a, i32 %b, <2 x i32> %c) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %c to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1
// CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
// CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]])
// CHECK: ret i64 [[VQDMLXL1]]
}
// CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7
// CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0
// CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0
// CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]])
}
// CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3
// CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]])
// CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]])
// CHECK: ret i64 [[VQDMLXL1]]
// CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64_0() #0 {
// CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
// CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP3]], i32 0
-// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP1]] to <8 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double>
-// CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP5]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
+// CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]])
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]], double [[VMULXD_F64_I]], i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
// CHECK: ret <1 x double> [[VSET_LANE]]
float64x1_t test_vmulx_lane_f64_0() {
float64x1_t arg1;
// CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
// CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
-// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP3]], i32 0
-// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[SHUFFLE_I]] to <16 x i8>
-// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
// CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
-// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8>
-// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]], double [[VMULXD_F64_I]], i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
// CHECK: ret <1 x double> [[VSET_LANE]]
float64x1_t test_vmulx_laneq_f64_2() {
float64x1_t arg1;
}
// CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
uint16_t test_vget_lane_u16(uint16x4_t a) {
return vget_lane_u16(a, 3);
}
// CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
// CHECK: ret i32 [[VGET_LANE]]
uint32_t test_vget_lane_u32(uint32x2_t a) {
return vget_lane_u32(a, 1);
}
// CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
int16_t test_vget_lane_s16(int16x4_t a) {
return vget_lane_s16(a, 3);
}
// CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
// CHECK: ret i32 [[VGET_LANE]]
int32_t test_vget_lane_s32(int32x2_t a) {
return vget_lane_s32(a, 1);
}
// CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
poly16_t test_vget_lane_p16(poly16x4_t a) {
return vget_lane_p16(a, 3);
}
// CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
// CHECK: ret float [[VGET_LANE]]
float32_t test_vget_lane_f32(float32x2_t a) {
return vget_lane_f32(a, 1);
// CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
// CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
// CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
// CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
}
// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGETQ_LANE]]
uint16_t test_vgetq_lane_u16(uint16x8_t a) {
return vgetq_lane_u16(a, 7);
}
// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
// CHECK: ret i32 [[VGETQ_LANE]]
uint32_t test_vgetq_lane_u32(uint32x4_t a) {
return vgetq_lane_u32(a, 3);
}
// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGETQ_LANE]]
int16_t test_vgetq_lane_s16(int16x8_t a) {
return vgetq_lane_s16(a, 7);
}
// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
// CHECK: ret i32 [[VGETQ_LANE]]
int32_t test_vgetq_lane_s32(int32x4_t a) {
return vgetq_lane_s32(a, 3);
}
// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGETQ_LANE]]
poly16_t test_vgetq_lane_p16(poly16x8_t a) {
return vgetq_lane_p16(a, 7);
}
// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a, i32 3
// CHECK: ret float [[VGETQ_LANE]]
float32_t test_vgetq_lane_f32(float32x4_t a) {
return vgetq_lane_f32(a, 3);
// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
// CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
// CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
// CHECK: store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2
// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
}
// CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
// CHECK: ret i64 [[VGET_LANE]]
int64_t test_vget_lane_s64(int64x1_t a) {
return vget_lane_s64(a, 0);
}
// CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
// CHECK: ret i64 [[VGET_LANE]]
uint64_t test_vget_lane_u64(uint64x1_t a) {
return vget_lane_u64(a, 0);
}
// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
// CHECK: ret i64 [[VGETQ_LANE]]
int64_t test_vgetq_lane_s64(int64x2_t a) {
return vgetq_lane_s64(a, 1);
}
// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
// CHECK: ret i64 [[VGETQ_LANE]]
uint64_t test_vgetq_lane_u64(uint64x2_t a) {
return vgetq_lane_u64(a, 1);
}
// CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 %a, <4 x i16> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
// CHECK: ret <4 x i16> [[VSET_LANE]]
uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
return vset_lane_u16(a, b, 3);
}
// CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
// CHECK: ret <2 x i32> [[VSET_LANE]]
uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
return vset_lane_u32(a, b, 1);
}
// CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 %a, <4 x i16> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
// CHECK: ret <4 x i16> [[VSET_LANE]]
int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
return vset_lane_s16(a, b, 3);
}
// CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
// CHECK: ret <2 x i32> [[VSET_LANE]]
int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
return vset_lane_s32(a, b, 1);
}
// CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 %a, <4 x i16> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
// CHECK: ret <4 x i16> [[VSET_LANE]]
poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
return vset_lane_p16(a, b, 3);
}
// CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1
// CHECK: ret <2 x float> [[VSET_LANE]]
float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
return vset_lane_f32(a, b, 1);
// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
// CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 3
// CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
}
// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
+// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
// CHECK: ret <8 x i16> [[VSET_LANE]]
uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
return vsetq_lane_u16(a, b, 7);
}
// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
// CHECK: ret <4 x i32> [[VSET_LANE]]
uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
return vsetq_lane_u32(a, b, 3);
}
// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
+// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
// CHECK: ret <8 x i16> [[VSET_LANE]]
int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
return vsetq_lane_s16(a, b, 7);
}
// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
// CHECK: ret <4 x i32> [[VSET_LANE]]
int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
return vsetq_lane_s32(a, b, 3);
}
// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
+// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
// CHECK: ret <8 x i16> [[VSET_LANE]]
poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
return vsetq_lane_p16(a, b, 7);
}
// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3
// CHECK: ret <4 x float> [[VSET_LANE]]
float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
return vsetq_lane_f32(a, b, 3);
// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
// CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 7
+// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 7
// CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
}
// CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
// CHECK: ret <1 x i64> [[VSET_LANE]]
int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
return vset_lane_s64(a, b, 0);
}
// CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
// CHECK: ret <1 x i64> [[VSET_LANE]]
uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
return vset_lane_u64(a, b, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
// CHECK: ret <2 x i64> [[VSET_LANE]]
int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
return vsetq_lane_s64(a, b, 1);
}
// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
// CHECK: ret <2 x i64> [[VSET_LANE]]
uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
return vsetq_lane_u64(a, b, 1);
}
// CHECK-LABEL: define i64 @test_vget_lane_p64(<1 x i64> %v) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %v, i32 0
// CHECK: ret i64 [[VGET_LANE]]
poly64_t test_vget_lane_p64(poly64x1_t v) {
return vget_lane_p64(v, 0);
}
// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %v, i32 1
// CHECK: ret i64 [[VGETQ_LANE]]
poly64_t test_vgetq_lane_p64(poly64x2_t v) {
return vgetq_lane_p64(v, 1);
}
// CHECK-LABEL: define <1 x i64> @test_vset_lane_p64(i64 %a, <1 x i64> %v) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %v, i64 %a, i32 0
// CHECK: ret <1 x i64> [[VSET_LANE]]
poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
return vset_lane_p64(a, v, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %v, i64 %a, i32 1
// CHECK: ret <2 x i64> [[VSET_LANE]]
poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) {
return vsetq_lane_p64(a, v, 1);
}
// CHECK-LABEL: define <1 x i64> @test_vcopy_lane_p64(<1 x i64> %a, <1 x i64> %b) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
-// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %a, i64 [[VGET_LANE]], i32 0
// CHECK: ret <1 x i64> [[VSET_LANE]]
poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
return vcopy_lane_p64(a, 0, b, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGET_LANE]], i32 1
// CHECK: ret <2 x i64> [[VSET_LANE]]
poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
return vcopyq_lane_p64(a, 1, b, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 1
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %b, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGETQ_LANE]], i32 1
// CHECK: ret <2 x i64> [[VSET_LANE]]
poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
return vcopyq_laneq_p64(a, 1, b, 1);
}
// CHECK-LABEL: test_vfmah_lane_f16
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %c to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
+// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
// CHECK: ret half [[FMA]]
float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) {
}
// CHECK-LABEL: test_vfmah_laneq_f16
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %c to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
+// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a)
// CHECK: ret half [[FMA]]
float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
// CHECK: [[TMP0:%.*]] = fpext half %b to float
// CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]]
// CHECK: [[SUB:%.*]] = fptrunc float [[TMP1]] to half
-// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half>
-// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP3]], i32 3
+// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3
// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
// CHECK: ret half [[FMA]]
float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) {
// CHECK: [[TMP0:%.*]] = fpext half %b to float
// CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]]
// CHECK: [[SUB:%.*]] = fptrunc float [[TMP1]] to half
-// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half>
-// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP3]], i32 7
+// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7
// CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a)
// CHECK: ret half [[FMA]]
float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) {
}
// CHECK-LABEL: test_vmulxh_lane_f16
-// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
-// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
+// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %b, i32 3
// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]]
// CHECK: ret half [[MULX]]
float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) {
}
// CHECK-LABEL: test_vmulxh_laneq_f16
-// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
-// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
+// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %b, i32 7
// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]])
// CHECK: ret half [[MULX]]
float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
// CHECK: extractelement <8 x i8> %src, i32 2
// CHECK-BE-LABEL: @test_vdupb_lane_s8
- // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> {{.*}}, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> %src, <8 x i8> %src, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE: extractelement <8 x i8> [[REV]], i32 2
}
// CHECK: extractelement <8 x i8> %src, i32 2
// CHECK-BE-LABEL: @test_vdupb_lane_u8
- // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> {{.*}}, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+ // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> %src, <8 x i8> %src, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
// CHECK-BE: extractelement <8 x i8> [[REV]], i32 2
}
int16_t test_vduph_lane_s16(int16x4_t src) {
return vduph_lane_s16(src, 2);
// CHECK-LABEL: @test_vduph_lane_s16
- // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %src to [[TYPE:.*]]
- // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
- // CHECK: extractelement <4 x i16> [[TMP2]], i32 2
+ // CHECK: extractelement <4 x i16> %src, i32 2
// CHECK-BE-LABEL: @test_vduph_lane_s16
- // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
- // CHECK-BE: [[TMP1:%.*]] = bitcast <4 x i16> [[REV]] to [[TYPE:.*]]
- // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
- // CHECK-BE: extractelement <4 x i16> [[TMP2]], i32 2
+ // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> %src, <4 x i16> %src, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ // CHECK-BE: extractelement <4 x i16> [[REV]], i32 2
}
uint16_t test_vduph_lane_u16(uint16x4_t src) {
return vduph_lane_u16(src, 2);
// CHECK-LABEL: @test_vduph_lane_u16
- // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %src to [[TYPE:.*]]
- // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
- // CHECK: extractelement <4 x i16> [[TMP2]], i32 2
+ // CHECK: extractelement <4 x i16> %src, i32 2
// CHECK-BE-LABEL: @test_vduph_lane_u16
- // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
- // CHECK-BE: [[TMP1:%.*]] = bitcast <4 x i16> [[REV]] to [[TYPE:.*]]
- // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16>
- // CHECK-BE: extractelement <4 x i16> [[TMP2]], i32 2
+ // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> %src, <4 x i16> %src, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ // CHECK-BE: extractelement <4 x i16> [[REV]], i32 2
}
int32_t test_vdups_lane_s32(int32x2_t src) {
return vdups_lane_s32(src, 0);
// CHECK-LABEL: @test_vdups_lane_s32
- // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %src to [[TYPE:.*]]
- // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
- // CHECK: extractelement <2 x i32> [[TMP2]], i32 0
+ // CHECK: extractelement <2 x i32> %src, i32 0
// CHECK-BE-LABEL: @test_vdups_lane_s32
- // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32> <i32 1, i32 0>
- // CHECK-BE: [[TMP1:%.*]] = bitcast <2 x i32> [[REV]] to [[TYPE:.*]]
- // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
- // CHECK-BE: extractelement <2 x i32> [[TMP2]], i32 0
+ // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> %src, <2 x i32> %src, <2 x i32> <i32 1, i32 0>
+ // CHECK-BE: extractelement <2 x i32> [[REV]], i32 0
}
uint32_t test_vdups_lane_u32(uint32x2_t src) {
return vdups_lane_u32(src, 0);
// CHECK-LABEL: @test_vdups_lane_u32
- // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %src to [[TYPE:.*]]
- // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
- // CHECK: extractelement <2 x i32> [[TMP2]], i32 0
+ // CHECK: extractelement <2 x i32> %src, i32 0
// CHECK-BE-LABEL: @test_vdups_lane_u32
- // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32> <i32 1, i32 0>
- // CHECK-BE: [[TMP1:%.*]] = bitcast <2 x i32> [[REV]] to [[TYPE:.*]]
- // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32>
- // CHECK-BE: extractelement <2 x i32> [[TMP2]], i32 0
+ // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> %src, <2 x i32> %src, <2 x i32> <i32 1, i32 0>
+ // CHECK-BE: extractelement <2 x i32> [[REV]], i32 0
}
float32_t test_vdups_lane_f32(float32x2_t src) {
return vdups_lane_f32(src, 0);
// CHECK-LABEL: @test_vdups_lane_f32
- // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %src to [[TYPE:.*]]
- // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x float>
- // CHECK: extractelement <2 x float> [[TMP2]], i32 0
+ // CHECK: extractelement <2 x float> %src, i32 0
// CHECK-BE-LABEL: @test_vdups_lane_f32
- // CHECK-BE: [[REV:%.*]] = shufflevector <2 x float> {{.*}}, <2 x i32> <i32 1, i32 0>
- // CHECK-BE: [[TMP1:%.*]] = bitcast <2 x float> [[REV]] to [[TYPE:.*]]
- // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x float>
- // CHECK-BE: extractelement <2 x float> [[TMP2]], i32 0
+ // CHECK-BE: [[REV:%.*]] = shufflevector <2 x float> %src, <2 x float> %src, <2 x i32> <i32 1, i32 0>
+ // CHECK-BE: extractelement <2 x float> [[REV]], i32 0
}
int64_t test_vdupd_lane_s64(int64x1_t src) {
return vdupd_lane_s64(src, 0);
// CHECK-LABEL: @test_vdupd_lane_s64
- // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %src to [[TYPE:.*]]
- // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x i64>
- // CHECK: extractelement <1 x i64> [[TMP2]], i32 0
+ // CHECK: extractelement <1 x i64> %src, i32 0
// CHECK-BE-LABEL: @test_vdupd_lane_s64
- // CHECK-BE: extractelement <1 x i64> {{.*}}, i32 0
+ // CHECK-BE: extractelement <1 x i64> %src, i32 0
}
uint64_t test_vdupd_lane_u64(uint64x1_t src) {
return vdupd_lane_u64(src, 0);
// CHECK-LABEL: @test_vdupd_lane_u64
- // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %src to [[TYPE:.*]]
- // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x i64>
- // CHECK: extractelement <1 x i64> [[TMP2]], i32 0
+ // CHECK: extractelement <1 x i64> %src, i32 0
// CHECK-BE-LABEL: @test_vdupd_lane_u64
- // CHECK-BE: extractelement <1 x i64> {{.*}}, i32 0
+ // CHECK-BE: extractelement <1 x i64> %src, i32 0
}
float64_t test_vdupd_lane_f64(float64x1_t src) {
return vdupd_lane_f64(src, 0);
// CHECK-LABEL: @test_vdupd_lane_f64
- // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %src to [[TYPE:.*]]
- // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x double>
- // CHECK: extractelement <1 x double> [[TMP2]], i32 0
+ // CHECK: extractelement <1 x double> %src, i32 0
// CHECK-BE-LABEL: @test_vdupd_lane_f64
- // CHECK-BE: extractelement <1 x double> {{.*}}, i32 0
+ // CHECK-BE: extractelement <1 x double> %src, i32 0
}
}
// CHECK-LABEL: define <8 x i16> @test_vcopyq_laneq_s16(<8 x i16> %a1, <8 x i16> %a2) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %a1 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[VGETQ_LANE]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a2, i32 7
+// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %a1, i16 [[VGETQ_LANE]], i32 3
// CHECK: ret <8 x i16> [[VSET_LANE]]
int16x8_t test_vcopyq_laneq_s16(int16x8_t a1, int16x8_t a2) {
return vcopyq_laneq_s16(a1, (int64_t) 3, a2, (int64_t) 7);
}
// CHECK-LABEL: define <8 x i16> @test_vcopyq_laneq_u16(<8 x i16> %a1, <8 x i16> %a2) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %a1 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[VGETQ_LANE]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a2, i32 7
+// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %a1, i16 [[VGETQ_LANE]], i32 3
// CHECK: ret <8 x i16> [[VSET_LANE]]
uint16x8_t test_vcopyq_laneq_u16(uint16x8_t a1, uint16x8_t a2) {
return vcopyq_laneq_u16(a1, (int64_t) 3, a2, (int64_t) 7);
}
// CHECK-LABEL: define <4 x i32> @test_vcopyq_laneq_s32(<4 x i32> %a1, <4 x i32> %a2) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %a1 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[VGETQ_LANE]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a2, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %a1, i32 [[VGETQ_LANE]], i32 3
// CHECK: ret <4 x i32> [[VSET_LANE]]
int32x4_t test_vcopyq_laneq_s32(int32x4_t a1, int32x4_t a2) {
return vcopyq_laneq_s32(a1, (int64_t) 3, a2, (int64_t) 3);
}
// CHECK-LABEL: define <4 x i32> @test_vcopyq_laneq_u32(<4 x i32> %a1, <4 x i32> %a2) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %a1 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[VGETQ_LANE]], i32 3
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a2, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %a1, i32 [[VGETQ_LANE]], i32 3
// CHECK: ret <4 x i32> [[VSET_LANE]]
uint32x4_t test_vcopyq_laneq_u32(uint32x4_t a1, uint32x4_t a2) {
return vcopyq_laneq_u32(a1, (int64_t) 3, a2, (int64_t) 3);
}
// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_s64(<2 x i64> %a1, <2 x i64> %a2) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a1 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 0
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a2, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a1, i64 [[VGETQ_LANE]], i32 0
// CHECK: ret <2 x i64> [[VSET_LANE]]
int64x2_t test_vcopyq_laneq_s64(int64x2_t a1, int64x2_t a2) {
return vcopyq_laneq_s64(a1, (int64_t) 0, a2, (int64_t) 1);
}
// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_u64(<2 x i64> %a1, <2 x i64> %a2) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
-// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a1 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 0
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a2, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a1, i64 [[VGETQ_LANE]], i32 0
// CHECK: ret <2 x i64> [[VSET_LANE]]
uint64x2_t test_vcopyq_laneq_u64(uint64x2_t a1, uint64x2_t a2) {
return vcopyq_laneq_u64(a1, (int64_t) 0, a2, (int64_t) 1);
}
// CHECK-LABEL: define <4 x float> @test_vcopyq_laneq_f32(<4 x float> %a1, <4 x float> %a2) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
-// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %a1 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP3]], float [[VGETQ_LANE]], i32 0
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a2, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %a1, float [[VGETQ_LANE]], i32 0
// CHECK: ret <4 x float> [[VSET_LANE]]
float32x4_t test_vcopyq_laneq_f32(float32x4_t a1, float32x4_t a2) {
return vcopyq_laneq_f32(a1, 0, a2, 3);
}
// CHECK-LABEL: define <2 x double> @test_vcopyq_laneq_f64(<2 x double> %a1, <2 x double> %a2) #0 {
-// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a2 to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
-// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
-// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %a1 to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x double> [[TMP3]], double [[VGETQ_LANE]], i32 0
+// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %a2, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x double> %a1, double [[VGETQ_LANE]], i32 0
// CHECK: ret <2 x double> [[VSET_LANE]]
float64x2_t test_vcopyq_laneq_f64(float64x2_t a1, float64x2_t a2) {
return vcopyq_laneq_f64(a1, 0, a2, 1);
}
// CHECK-LABEL: @test_vget_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
uint16_t test_vget_lane_u16(uint16x4_t a) {
return vget_lane_u16(a, 3);
}
// CHECK-LABEL: @test_vget_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
// CHECK: ret i32 [[VGET_LANE]]
uint32_t test_vget_lane_u32(uint32x2_t a) {
return vget_lane_u32(a, 1);
}
// CHECK-LABEL: @test_vget_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
int16_t test_vget_lane_s16(int16x4_t a) {
return vget_lane_s16(a, 3);
}
// CHECK-LABEL: @test_vget_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
// CHECK: ret i32 [[VGET_LANE]]
int32_t test_vget_lane_s32(int32x2_t a) {
return vget_lane_s32(a, 1);
}
// CHECK-LABEL: @test_vget_lane_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
// CHECK: ret i16 [[VGET_LANE]]
poly16_t test_vget_lane_p16(poly16x4_t a) {
return vget_lane_p16(a, 3);
}
// CHECK-LABEL: @test_vget_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
// CHECK: ret float [[VGET_LANE]]
float32_t test_vget_lane_f32(float32x2_t a) {
return vget_lane_f32(a, 1);
// CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
// CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
// CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
-// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
// CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
}
// CHECK-LABEL: @test_vgetq_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGET_LANE]]
uint16_t test_vgetq_lane_u16(uint16x8_t a) {
return vgetq_lane_u16(a, 7);
}
// CHECK-LABEL: @test_vgetq_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
// CHECK: ret i32 [[VGET_LANE]]
uint32_t test_vgetq_lane_u32(uint32x4_t a) {
return vgetq_lane_u32(a, 3);
}
// CHECK-LABEL: @test_vgetq_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGET_LANE]]
int16_t test_vgetq_lane_s16(int16x8_t a) {
return vgetq_lane_s16(a, 7);
}
// CHECK-LABEL: @test_vgetq_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
// CHECK: ret i32 [[VGET_LANE]]
int32_t test_vgetq_lane_s32(int32x4_t a) {
return vgetq_lane_s32(a, 3);
}
// CHECK-LABEL: @test_vgetq_lane_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
+// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
// CHECK: ret i16 [[VGET_LANE]]
poly16_t test_vgetq_lane_p16(poly16x8_t a) {
return vgetq_lane_p16(a, 7);
}
// CHECK-LABEL: @test_vgetq_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> %a, i32 3
// CHECK: ret float [[VGET_LANE]]
float32_t test_vgetq_lane_f32(float32x4_t a) {
return vgetq_lane_f32(a, 3);
// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
// CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
// CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
-// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8>
-// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3
+// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
// CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2
// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
}
// CHECK-LABEL: @test_vget_lane_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
// CHECK: ret i64 [[VGET_LANE]]
int64_t test_vget_lane_s64(int64x1_t a) {
return vget_lane_s64(a, 0);
}
// CHECK-LABEL: @test_vget_lane_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
+// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
// CHECK: ret i64 [[VGET_LANE]]
uint64_t test_vget_lane_u64(uint64x1_t a) {
return vget_lane_u64(a, 0);
}
// CHECK-LABEL: @test_vgetq_lane_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
// CHECK: ret i64 [[VGET_LANE]]
int64_t test_vgetq_lane_s64(int64x2_t a) {
return vgetq_lane_s64(a, 1);
}
// CHECK-LABEL: @test_vgetq_lane_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
+// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
// CHECK: ret i64 [[VGET_LANE]]
uint64_t test_vgetq_lane_u64(uint64x2_t a) {
return vgetq_lane_u64(a, 1);
}
// CHECK-LABEL: @test_vset_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
// CHECK: ret <4 x i16> [[VSET_LANE]]
uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
return vset_lane_u16(a, b, 3);
}
// CHECK-LABEL: @test_vset_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
// CHECK: ret <2 x i32> [[VSET_LANE]]
uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
return vset_lane_u32(a, b, 1);
}
// CHECK-LABEL: @test_vset_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
// CHECK: ret <4 x i16> [[VSET_LANE]]
int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
return vset_lane_s16(a, b, 3);
}
// CHECK-LABEL: @test_vset_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
// CHECK: ret <2 x i32> [[VSET_LANE]]
int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
return vset_lane_s32(a, b, 1);
}
// CHECK-LABEL: @test_vset_lane_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
// CHECK: ret <4 x i16> [[VSET_LANE]]
poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
return vset_lane_p16(a, b, 3);
}
// CHECK-LABEL: @test_vset_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1
// CHECK: ret <2 x float> [[VSET_LANE]]
float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
return vset_lane_f32(a, b, 1);
// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
// CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
-// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
-// CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 1
// CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
// CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
}
// CHECK-LABEL: @test_vsetq_lane_u16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
+// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
// CHECK: ret <8 x i16> [[VSET_LANE]]
uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
return vsetq_lane_u16(a, b, 7);
}
// CHECK-LABEL: @test_vsetq_lane_u32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
// CHECK: ret <4 x i32> [[VSET_LANE]]
uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
return vsetq_lane_u32(a, b, 3);
}
// CHECK-LABEL: @test_vsetq_lane_s16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
+// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
// CHECK: ret <8 x i16> [[VSET_LANE]]
int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
return vsetq_lane_s16(a, b, 7);
}
// CHECK-LABEL: @test_vsetq_lane_s32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
// CHECK: ret <4 x i32> [[VSET_LANE]]
int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
return vsetq_lane_s32(a, b, 3);
}
// CHECK-LABEL: @test_vsetq_lane_p16(
-// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
+// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
// CHECK: ret <8 x i16> [[VSET_LANE]]
poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
return vsetq_lane_p16(a, b, 7);
}
// CHECK-LABEL: @test_vsetq_lane_f32(
-// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3
// CHECK: ret <4 x float> [[VSET_LANE]]
float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
return vsetq_lane_f32(a, b, 3);
// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
// CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
-// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
-// CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 3
+// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 3
// CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
// CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
}
// CHECK-LABEL: @test_vset_lane_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
// CHECK: ret <1 x i64> [[VSET_LANE]]
int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
return vset_lane_s64(a, b, 0);
}
// CHECK-LABEL: @test_vset_lane_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0
+// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
// CHECK: ret <1 x i64> [[VSET_LANE]]
uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
return vset_lane_u64(a, b, 0);
}
// CHECK-LABEL: @test_vsetq_lane_s64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
// CHECK: ret <2 x i64> [[VSET_LANE]]
int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
return vsetq_lane_s64(a, b, 1);
}
// CHECK-LABEL: @test_vsetq_lane_u64(
-// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
-// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
-// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
+// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
// CHECK: ret <2 x i64> [[VSET_LANE]]
uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
return vsetq_lane_u64(a, b, 1);
// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++ --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++14 -xc++ %s
// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++ --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++17 -xc++ %s
+// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc -flax-vector-conversions=none %s
+// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc -flax-vector-conversions=none %s
+
#include <arm_neon.h>
default: llvm_unreachable("Unhandled case!");
}
- if (isChar() && !Pointer)
+ if (isChar() && !Pointer && Signed)
// Make chars explicitly signed.
S = "S" + S;
else if (isInteger() && !Pointer && !Signed)
}
// Check if an explicit cast is needed.
- if (CastToType.isVector()) {
+ if (CastToType.isVector() && LocalCK == ClassB) {
CastToType.makeInteger(8, true);
Arg = "(" + CastToType.str() + ")" + Arg;
}