From: David Majnemer Date: Tue, 26 Jul 2016 15:21:18 +0000 (+0000) Subject: Update for LLVM changes X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bf5e8c5b38fa3d55fc027edd56b1ca72acf303fe;p=clang Update for LLVM changes InstSimplify has gained the ability to remove needless bitcasts which perturbed some clang codegen tests. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@276756 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/arm_neon_intrinsics.c b/test/CodeGen/arm_neon_intrinsics.c index 4cc7eedffd..ad8587b0db 100644 --- a/test/CodeGen/arm_neon_intrinsics.c +++ b/test/CodeGen/arm_neon_intrinsics.c @@ -6,7 +6,7 @@ #include -// CHECK-LABEL: define <8 x i8> @test_vaba_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vaba_s8( // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]] // CHECK: ret <8 x i8> [[ADD_I]] @@ -14,35 +14,29 @@ int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) { return vaba_s8(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vaba_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vaba_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> -// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[TMP2]] +// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]] // CHECK: ret <4 x i16> [[ADD_I]] int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) { return vaba_s16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vaba_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vaba_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> -// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[TMP2]] +// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]] // CHECK: ret <2 x i32> [[ADD_I]] int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) { return vaba_s32(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vaba_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vaba_u8( // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]] // CHECK: ret <8 x i8> [[ADD_I]] @@ -50,35 +44,29 @@ uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { return vaba_u8(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vaba_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vaba_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> -// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[TMP2]] +// CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]] // CHECK: ret <4 x i16> [[ADD_I]] uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { return vaba_u16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vaba_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vaba_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> -// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[TMP2]] +// CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]] // CHECK: ret <2 x i32> [[ADD_I]] uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { return vaba_u32(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vabaq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vabaq_s8( // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c) #4 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]] // CHECK: ret <16 x i8> [[ADD_I]] @@ -86,35 +74,29 @@ int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { return vabaq_s8(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vabaq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vabaq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8> -// CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> [[VABDQ_V_I_I]], <8 x i16> [[VABDQ_V1_I_I]]) #4 +// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %b, <8 x i16> %c) #4 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <8 x i16> -// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP2]] +// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { return vabaq_s16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vabaq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +// CHECK-LABEL: @test_vabaq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8> -// CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> [[VABDQ_V_I_I]], <4 x i32> [[VABDQ_V1_I_I]]) #4 +// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %b, <4 x i32> %c) #4 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <4 x i32> -// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] +// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { return vabaq_s32(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vabaq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vabaq_u8( // CHECK: [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c) #4 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]] // CHECK: ret <16 x i8> [[ADD_I]] @@ -122,36 +104,29 @@ uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { return vabaq_u8(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vabaq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vabaq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8> -// CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> [[VABDQ_V_I_I]], <8 x i16> [[VABDQ_V1_I_I]]) #4 +// CHECK: [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %b, <8 x i16> %c) #4 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <8 x i16> -// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP2]] +// CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { return vabaq_u16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vabaq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +// CHECK-LABEL: @test_vabaq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8> -// CHECK: [[VABDQ_V_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VABDQ_V1_I_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> [[VABDQ_V_I_I]], <4 x i32> [[VABDQ_V1_I_I]]) #4 +// CHECK: [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %b, <4 x i32> %c) #4 // CHECK: [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I_I]] to <4 x i32> -// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP2]] +// CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { return vabaq_u32(a, b, c); } - -// CHECK-LABEL: define <8 x i16> @test_vabal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vabal_s8( // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] @@ -160,41 +135,33 @@ int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { return vabal_s8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vabal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vabal_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I_I]], <4 x i16> [[VABD_V1_I_I_I]]) #4 +// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vabal_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vabal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vabal_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I_I]], <2 x i32> [[VABD_V1_I_I_I]]) #4 +// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> +// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vabal_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vabal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vabal_u8( // CHECK: [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] @@ -203,201 +170,161 @@ uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { return vabal_u8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vabal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vabal_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I_I]], <4 x i16> [[VABD_V1_I_I_I]]) #4 +// CHECK: [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { return vabal_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vabal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vabal_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VABD_V_I_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD_V1_I_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I_I]], <2 x i32> [[VABD_V1_I_I_I]]) #4 +// CHECK: [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I_I]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> +// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8> +// CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { return vabal_u32(a, b, c); } - -// CHECK-LABEL: define <8 x i8> @test_vabd_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vabd_s8( // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VABD_V_I]] int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) { return vabd_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vabd_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vabd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]]) #4 +// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VABD_V2_I]] int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) { return vabd_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vabd_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vabd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]]) #4 +// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VABD_V2_I]] int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) { return vabd_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vabd_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vabd_u8( // CHECK: [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VABD_V_I]] uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) { return vabd_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vabd_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vabd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I]], <4 x i16> [[VABD_V1_I]]) #4 +// CHECK: [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VABD_V2_I]] uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) { return vabd_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vabd_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vabd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I]], <2 x i32> [[VABD_V1_I]]) #4 +// CHECK: [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VABD_V2_I]] uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) { return vabd_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vabd_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vabd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VABD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VABD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> [[VABD_V_I]], <2 x float> [[VABD_V1_I]]) #4 +// CHECK: [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: ret <2 x float> [[VABD_V2_I]] float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) { return vabd_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vabdq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vabdq_s8( // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VABDQ_V_I]] int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) { return vabdq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vabdq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vabdq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]]) #4 +// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VABDQ_V2_I]] int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) { return vabdq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vabdq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vabdq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]]) #4 +// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VABDQ_V2_I]] int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) { return vabdq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vabdq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vabdq_u8( // CHECK: [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VABDQ_V_I]] uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) { return vabdq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vabdq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vabdq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> [[VABDQ_V_I]], <8 x i16> [[VABDQ_V1_I]]) #4 +// CHECK: [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VABDQ_V2_I]] uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) { return vabdq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vabdq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vabdq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> [[VABDQ_V_I]], <4 x i32> [[VABDQ_V1_I]]) #4 +// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VABDQ_V2_I]] uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) { return vabdq_u32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vabdq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vabdq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VABDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VABDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> [[VABDQ_V_I]], <4 x float> [[VABDQ_V1_I]]) #4 +// CHECK: [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VABDQ_V3_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP2]] +// CHECK: ret <4 x float> [[VABDQ_V2_I]] float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) { return vabdq_f32(a, b); } - -// CHECK-LABEL: define <8 x i16> @test_vabdl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vabdl_s8( // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I_I]] @@ -405,39 +332,31 @@ int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { return vabdl_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vabdl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vabdl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I_I]] int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { return vabdl_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vabdl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vabdl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> +// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I_I]] int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) { return vabdl_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vabdl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vabdl_u8( // CHECK: [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I_I]] @@ -445,241 +364,222 @@ uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { return vabdl_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vabdl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vabdl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> [[VABD_V_I_I]], <4 x i16> [[VABD_V1_I_I]]) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> +// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I_I]] uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) { return vabdl_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vabdl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vabdl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VABD_V_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABD_V1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> [[VABD_V_I_I]], <2 x i32> [[VABD_V1_I_I]]) #4 +// CHECK: [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VABD_V3_I_I]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> +// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I_I]] uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { return vabdl_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vabs_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vabs_s8( // CHECK: [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a) #4 // CHECK: ret <8 x i8> [[VABS_I]] int8x8_t test_vabs_s8(int8x8_t a) { return vabs_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vabs_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vabs_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> [[VABS_I]]) #4 +// CHECK: [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a) #4 // CHECK: ret <4 x i16> [[VABS1_I]] int16x4_t test_vabs_s16(int16x4_t a) { return vabs_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vabs_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vabs_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> [[VABS_I]]) #4 +// CHECK: [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a) #4 // CHECK: ret <2 x i32> [[VABS1_I]] int32x2_t test_vabs_s32(int32x2_t a) { return vabs_s32(a); } -// CHECK-LABEL: define <2 x float> @test_vabs_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vabs_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[VABS_I]]) #4 +// CHECK: [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a) #4 // CHECK: ret <2 x float> [[VABS1_I]] float32x2_t test_vabs_f32(float32x2_t a) { return vabs_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vabsq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vabsq_s8( // CHECK: [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a) #4 // CHECK: ret <16 x i8> [[VABS_I]] int8x16_t test_vabsq_s8(int8x16_t a) { return vabsq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vabsq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vabsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> [[VABS_I]]) #4 +// CHECK: [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a) #4 // CHECK: ret <8 x i16> [[VABS1_I]] int16x8_t test_vabsq_s16(int16x8_t a) { return vabsq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vabsq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vabsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> [[VABS_I]]) #4 +// CHECK: [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a) #4 // CHECK: ret <4 x i32> [[VABS1_I]] int32x4_t test_vabsq_s32(int32x4_t a) { return vabsq_s32(a); } -// CHECK-LABEL: define <4 x float> @test_vabsq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vabsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VABS_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[VABS_I]]) #4 +// CHECK: [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #4 // CHECK: ret <4 x float> [[VABS1_I]] float32x4_t test_vabsq_f32(float32x4_t a) { return vabsq_f32(a); } - -// CHECK-LABEL: define <8 x i8> @test_vadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vadd_s8( // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b // CHECK: ret <8 x i8> [[ADD_I]] int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) { return vadd_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vadd_s16( // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b // CHECK: ret <4 x i16> [[ADD_I]] int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) { return vadd_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vadd_s32( // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b // CHECK: ret <2 x i32> [[ADD_I]] int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) { return vadd_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vadd_s64( // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b // CHECK: ret <1 x i64> [[ADD_I]] int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) { return vadd_s64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vadd_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vadd_f32( // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, %b // CHECK: ret <2 x float> [[ADD_I]] float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) { return vadd_f32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vadd_u8( // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, %b // CHECK: ret <8 x i8> [[ADD_I]] uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) { return vadd_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vadd_u16( // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, %b // CHECK: ret <4 x i16> [[ADD_I]] uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) { return vadd_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vadd_u32( // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, %b // CHECK: ret <2 x i32> [[ADD_I]] uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) { return vadd_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vadd_u64( // CHECK: [[ADD_I:%.*]] = add <1 x i64> %a, %b // CHECK: ret <1 x i64> [[ADD_I]] uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) { return vadd_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddq_s8( // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b // CHECK: ret <16 x i8> [[ADD_I]] int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) { return vaddq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddq_s16( // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b // CHECK: ret <8 x i16> [[ADD_I]] int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) { return vaddq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddq_s32( // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) { return vaddq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vaddq_s64( // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) { return vaddq_s64(a, b); } -// CHECK-LABEL: define <4 x float> @test_vaddq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vaddq_f32( // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, %b // CHECK: ret <4 x float> [[ADD_I]] float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) { return vaddq_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddq_u8( // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, %b // CHECK: ret <16 x i8> [[ADD_I]] uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) { return vaddq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddq_u16( // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, %b // CHECK: ret <8 x i16> [[ADD_I]] uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) { return vaddq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddq_u32( // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, %b // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) { return vaddq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vaddq_u64( // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, %b // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) { return vaddq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] @@ -687,12 +587,10 @@ int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { return vaddhn_s16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] @@ -700,12 +598,10 @@ int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { return vaddhn_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vaddhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] @@ -713,12 +609,10 @@ int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { return vaddhn_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VADDHN2_I]] @@ -726,12 +620,10 @@ uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { return vaddhn_u16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VADDHN2_I]] @@ -739,12 +631,10 @@ uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { return vaddhn_u32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vaddhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VADDHN2_I]] @@ -752,8 +642,7 @@ uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { return vaddhn_u64(a, b); } - -// CHECK-LABEL: define <8 x i16> @test_vaddl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddl_s8( // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] @@ -762,33 +651,29 @@ int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) { return vaddl_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { return vaddl_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) { return vaddl_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddl_u8( // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] @@ -797,34 +682,29 @@ uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) { return vaddl_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) { return vaddl_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) { return vaddl_u32(a, b); } - -// CHECK-LABEL: define <8 x i16> @test_vaddw_s8(<8 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddw_s8( // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -832,27 +712,25 @@ int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) { return vaddw_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddw_s16(<4 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddw_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { return vaddw_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddw_s32(<2 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddw_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) { return vaddw_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vaddw_u8(<8 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vaddw_u8( // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -860,141 +738,137 @@ uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) { return vaddw_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vaddw_u16(<4 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vaddw_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32> // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) { return vaddw_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vaddw_u32(<2 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vaddw_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64> // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) { return vaddw_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vand_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vand_s8( // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b // CHECK: ret <8 x i8> [[AND_I]] int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) { return vand_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vand_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vand_s16( // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b // CHECK: ret <4 x i16> [[AND_I]] int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) { return vand_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vand_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vand_s32( // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b // CHECK: ret <2 x i32> [[AND_I]] int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) { return vand_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vand_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vand_s64( // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b // CHECK: ret <1 x i64> [[AND_I]] int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) { return vand_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vand_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vand_u8( // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, %b // CHECK: ret <8 x i8> [[AND_I]] uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) { return vand_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vand_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vand_u16( // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, %b // CHECK: ret <4 x i16> [[AND_I]] uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) { return vand_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vand_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vand_u32( // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, %b // CHECK: ret <2 x i32> [[AND_I]] uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) { return vand_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vand_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vand_u64( // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, %b // CHECK: ret <1 x i64> [[AND_I]] uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) { return vand_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vandq_s8( // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b // CHECK: ret <16 x i8> [[AND_I]] int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) { return vandq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vandq_s16( // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b // CHECK: ret <8 x i16> [[AND_I]] int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) { return vandq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vandq_s32( // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b // CHECK: ret <4 x i32> [[AND_I]] int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) { return vandq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vandq_s64( // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b // CHECK: ret <2 x i64> [[AND_I]] int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) { return vandq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vandq_u8( // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b // CHECK: ret <16 x i8> [[AND_I]] uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) { return vandq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vandq_u16( // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b // CHECK: ret <8 x i16> [[AND_I]] uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) { return vandq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vandq_u32( // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b // CHECK: ret <4 x i32> [[AND_I]] uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) { return vandq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vandq_u64( // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b // CHECK: ret <2 x i64> [[AND_I]] uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) { return vandq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vbic_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vbic_s8( // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[AND_I]] @@ -1002,7 +876,7 @@ int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { return vbic_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vbic_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vbic_s16( // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[AND_I]] @@ -1010,7 +884,7 @@ int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { return vbic_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vbic_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vbic_s32( // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[AND_I]] @@ -1018,7 +892,7 @@ int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { return vbic_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vbic_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vbic_s64( // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[AND_I]] @@ -1026,7 +900,7 @@ int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { return vbic_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vbic_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vbic_u8( // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, // CHECK: [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[AND_I]] @@ -1034,7 +908,7 @@ uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { return vbic_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vbic_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vbic_u16( // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, // CHECK: [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[AND_I]] @@ -1042,7 +916,7 @@ uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { return vbic_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vbic_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vbic_u32( // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, // CHECK: [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[AND_I]] @@ -1050,7 +924,7 @@ uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { return vbic_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vbic_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vbic_u64( // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, // CHECK: [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[AND_I]] @@ -1058,7 +932,7 @@ uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { return vbic_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vbicq_s8( // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] @@ -1066,7 +940,7 @@ int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { return vbicq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vbicq_s16( // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] @@ -1074,7 +948,7 @@ int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { return vbicq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vbicq_s32( // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] @@ -1082,7 +956,7 @@ int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { return vbicq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vbicq_s64( // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] @@ -1090,7 +964,7 @@ int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { return vbicq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vbicq_u8( // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, // CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[AND_I]] @@ -1098,7 +972,7 @@ uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { return vbicq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vbicq_u16( // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, // CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[AND_I]] @@ -1106,7 +980,7 @@ uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { return vbicq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vbicq_u32( // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, // CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[AND_I]] @@ -1114,7 +988,7 @@ uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { return vbicq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vbicq_u64( // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, // CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[AND_I]] @@ -1122,15 +996,14 @@ uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { return vbicq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vbsl_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vbsl_s8( // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 // CHECK: ret <8 x i8> [[VBSL_V_I]] int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) { return vbsl_s8(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vbsl_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vbsl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> @@ -1141,7 +1014,7 @@ int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) { return vbsl_s16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vbsl_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vbsl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> @@ -1152,7 +1025,7 @@ int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) { return vbsl_s32(a, b, c); } -// CHECK-LABEL: define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 { +// CHECK-LABEL: @test_vbsl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> @@ -1163,14 +1036,14 @@ int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) { return vbsl_s64(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vbsl_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vbsl_u8( // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 // CHECK: ret <8 x i8> [[VBSL_V_I]] uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { return vbsl_u8(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vbsl_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vbsl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> @@ -1181,7 +1054,7 @@ uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { return vbsl_u16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vbsl_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vbsl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> @@ -1192,7 +1065,7 @@ uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { return vbsl_u32(a, b, c); } -// CHECK-LABEL: define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 { +// CHECK-LABEL: @test_vbsl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8> @@ -1203,7 +1076,7 @@ uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) { return vbsl_u64(a, b, c); } -// CHECK-LABEL: define <2 x float> @test_vbsl_f32(<2 x i32> %a, <2 x float> %b, <2 x float> %c) #0 { +// CHECK-LABEL: @test_vbsl_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> @@ -1214,14 +1087,14 @@ float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) { return vbsl_f32(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vbsl_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vbsl_p8( // CHECK: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 // CHECK: ret <8 x i8> [[VBSL_V_I]] poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) { return vbsl_p8(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vbsl_p16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vbsl_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> @@ -1232,14 +1105,14 @@ poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) { return vbsl_p16(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vbslq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vbslq_s8( // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 // CHECK: ret <16 x i8> [[VBSLQ_V_I]] int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) { return vbslq_s8(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vbslq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vbslq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> @@ -1250,7 +1123,7 @@ int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) { return vbslq_s16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vbslq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +// CHECK-LABEL: @test_vbslq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8> @@ -1261,7 +1134,7 @@ int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) { return vbslq_s32(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 { +// CHECK-LABEL: @test_vbslq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> @@ -1272,14 +1145,14 @@ int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) { return vbslq_s64(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vbslq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vbslq_u8( // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 // CHECK: ret <16 x i8> [[VBSLQ_V_I]] uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { return vbslq_u8(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vbslq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vbslq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> @@ -1290,7 +1163,7 @@ uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { return vbslq_u16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vbslq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +// CHECK-LABEL: @test_vbslq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8> @@ -1301,7 +1174,7 @@ uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { return vbslq_u32(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 { +// CHECK-LABEL: @test_vbslq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8> @@ -1312,7 +1185,7 @@ uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) { return vbslq_u64(a, b, c); } -// CHECK-LABEL: define <4 x float> @test_vbslq_f32(<4 x i32> %a, <4 x float> %b, <4 x float> %c) #0 { +// CHECK-LABEL: @test_vbslq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> @@ -1323,14 +1196,14 @@ float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) { return vbslq_f32(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vbslq_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vbslq_p8( // CHECK: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #4 // CHECK: ret <16 x i8> [[VBSLQ_V_I]] poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) { return vbslq_p8(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vbslq_p16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vbslq_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8> @@ -1341,100 +1214,79 @@ poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) { return vbslq_p16(a, b, c); } - -// CHECK-LABEL: define <2 x i32> @test_vcage_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vcage_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VCAGE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCAGE_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> [[VCAGE_V_I]], <2 x float> [[VCAGE_V1_I]]) #4 +// CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x i32> [[VCAGE_V2_I]] uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) { return vcage_f32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcageq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vcageq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VCAGEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCAGEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> [[VCAGEQ_V_I]], <4 x float> [[VCAGEQ_V1_I]]) #4 +// CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]] uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) { return vcageq_f32(a, b); } - -// CHECK-LABEL: define <2 x i32> @test_vcagt_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vcagt_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VCAGT_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCAGT_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> [[VCAGT_V_I]], <2 x float> [[VCAGT_V1_I]]) #4 +// CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: ret <2 x i32> [[VCAGT_V2_I]] uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) { return vcagt_f32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcagtq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vcagtq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VCAGTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCAGTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> [[VCAGTQ_V_I]], <4 x float> [[VCAGTQ_V1_I]]) #4 +// CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]] uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) { return vcagtq_f32(a, b); } - -// CHECK-LABEL: define <2 x i32> @test_vcale_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vcale_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VCALE_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VCALE_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> [[VCALE_V_I]], <2 x float> [[VCALE_V1_I]]) #4 +// CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %b, <2 x float> %a) #4 // CHECK: ret <2 x i32> [[VCALE_V2_I]] uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) { return vcale_f32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcaleq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vcaleq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VCALEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VCALEQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> [[VCALEQ_V_I]], <4 x float> [[VCALEQ_V1_I]]) #4 +// CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %b, <4 x float> %a) #4 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]] uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) { return vcaleq_f32(a, b); } - -// CHECK-LABEL: define <2 x i32> @test_vcalt_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vcalt_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VCALT_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VCALT_V1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> [[VCALT_V_I]], <2 x float> [[VCALT_V1_I]]) #4 +// CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %b, <2 x float> %a) #4 // CHECK: ret <2 x i32> [[VCALT_V2_I]] uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) { return vcalt_f32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcaltq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vcaltq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VCALTQ_V_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VCALTQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> [[VCALTQ_V_I]], <4 x float> [[VCALTQ_V1_I]]) #4 +// CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %b, <4 x float> %a) #4 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]] uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) { return vcaltq_f32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vceq_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vceq_s8( // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1442,7 +1294,7 @@ uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) { return vceq_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vceq_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vceq_s16( // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1450,7 +1302,7 @@ uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) { return vceq_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vceq_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vceq_s32( // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1458,7 +1310,7 @@ uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) { return vceq_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vceq_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vceq_f32( // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1466,7 +1318,7 @@ uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) { return vceq_f32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vceq_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vceq_u8( // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1474,7 +1326,7 @@ uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) { return vceq_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vceq_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vceq_u16( // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1482,7 +1334,7 @@ uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) { return vceq_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vceq_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vceq_u32( // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1490,7 +1342,7 @@ uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) { return vceq_u32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vceq_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vceq_p8( // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1498,7 +1350,7 @@ uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) { return vceq_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vceqq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vceqq_s8( // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1506,7 +1358,7 @@ uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) { return vceqq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vceqq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vceqq_s16( // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -1514,7 +1366,7 @@ uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) { return vceqq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vceqq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vceqq_s32( // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1522,7 +1374,7 @@ uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) { return vceqq_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vceqq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vceqq_f32( // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1530,7 +1382,7 @@ uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) { return vceqq_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vceqq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vceqq_u8( // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1538,7 +1390,7 @@ uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) { return vceqq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vceqq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vceqq_u16( // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -1546,7 +1398,7 @@ uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) { return vceqq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vceqq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vceqq_u32( // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1554,7 +1406,7 @@ uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) { return vceqq_u32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vceqq_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vceqq_p8( // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1562,8 +1414,7 @@ uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) { return vceqq_p8(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vcge_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vcge_s8( // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1571,7 +1422,7 @@ uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) { return vcge_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vcge_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vcge_s16( // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1579,7 +1430,7 @@ uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) { return vcge_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcge_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vcge_s32( // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1587,7 +1438,7 @@ uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) { return vcge_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcge_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vcge_f32( // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1595,7 +1446,7 @@ uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) { return vcge_f32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vcge_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vcge_u8( // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1603,7 +1454,7 @@ uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) { return vcge_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vcge_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vcge_u16( // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1611,7 +1462,7 @@ uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) { return vcge_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcge_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vcge_u32( // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1619,7 +1470,7 @@ uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) { return vcge_u32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vcgeq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vcgeq_s8( // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1627,7 +1478,7 @@ uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) { return vcgeq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcgeq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vcgeq_s16( // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -1635,7 +1486,7 @@ uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) { return vcgeq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcgeq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vcgeq_s32( // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1643,7 +1494,7 @@ uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) { return vcgeq_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcgeq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vcgeq_f32( // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1651,7 +1502,7 @@ uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) { return vcgeq_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vcgeq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vcgeq_u8( // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1659,7 +1510,7 @@ uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) { return vcgeq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcgeq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vcgeq_u16( // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -1667,7 +1518,7 @@ uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) { return vcgeq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcgeq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vcgeq_u32( // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1675,8 +1526,7 @@ uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) { return vcgeq_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vcgt_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vcgt_s8( // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1684,7 +1534,7 @@ uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) { return vcgt_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vcgt_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vcgt_s16( // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1692,7 +1542,7 @@ uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) { return vcgt_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcgt_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vcgt_s32( // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1700,7 +1550,7 @@ uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) { return vcgt_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcgt_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vcgt_f32( // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1708,7 +1558,7 @@ uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) { return vcgt_f32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vcgt_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vcgt_u8( // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1716,7 +1566,7 @@ uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) { return vcgt_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vcgt_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vcgt_u16( // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1724,7 +1574,7 @@ uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) { return vcgt_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcgt_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vcgt_u32( // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1732,7 +1582,7 @@ uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) { return vcgt_u32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vcgtq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vcgtq_s8( // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1740,7 +1590,7 @@ uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) { return vcgtq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcgtq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vcgtq_s16( // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -1748,7 +1598,7 @@ uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) { return vcgtq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcgtq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vcgtq_s32( // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1756,7 +1606,7 @@ uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) { return vcgtq_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcgtq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vcgtq_f32( // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1764,7 +1614,7 @@ uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) { return vcgtq_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vcgtq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vcgtq_u8( // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1772,7 +1622,7 @@ uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) { return vcgtq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcgtq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vcgtq_u16( // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -1780,7 +1630,7 @@ uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) { return vcgtq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcgtq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vcgtq_u32( // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1788,8 +1638,7 @@ uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) { return vcgtq_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vcle_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vcle_s8( // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1797,7 +1646,7 @@ uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) { return vcle_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vcle_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vcle_s16( // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1805,7 +1654,7 @@ uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) { return vcle_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcle_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vcle_s32( // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1813,7 +1662,7 @@ uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) { return vcle_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcle_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vcle_f32( // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1821,7 +1670,7 @@ uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) { return vcle_f32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vcle_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vcle_u8( // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1829,7 +1678,7 @@ uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) { return vcle_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vcle_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vcle_u16( // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1837,7 +1686,7 @@ uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) { return vcle_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vcle_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vcle_u32( // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1845,7 +1694,7 @@ uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) { return vcle_u32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vcleq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vcleq_s8( // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1853,7 +1702,7 @@ uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) { return vcleq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcleq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vcleq_s16( // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -1861,7 +1710,7 @@ uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) { return vcleq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcleq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vcleq_s32( // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1869,7 +1718,7 @@ uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) { return vcleq_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcleq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vcleq_f32( // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1877,7 +1726,7 @@ uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) { return vcleq_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vcleq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vcleq_u8( // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -1885,7 +1734,7 @@ uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) { return vcleq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcleq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vcleq_u16( // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -1893,7 +1742,7 @@ uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) { return vcleq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcleq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vcleq_u32( // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -1901,67 +1750,57 @@ uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) { return vcleq_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vcls_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcls_s8( // CHECK: [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a) #4 // CHECK: ret <8 x i8> [[VCLS_V_I]] int8x8_t test_vcls_s8(int8x8_t a) { return vcls_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vcls_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vcls_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> [[VCLS_V_I]]) #4 +// CHECK: [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a) #4 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VCLS_V1_I]] int16x4_t test_vcls_s16(int16x4_t a) { return vcls_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vcls_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcls_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> [[VCLS_V_I]]) #4 +// CHECK: [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a) #4 // CHECK: [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLS_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VCLS_V1_I]] int32x2_t test_vcls_s32(int32x2_t a) { return vcls_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vclsq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vclsq_s8( // CHECK: [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a) #4 // CHECK: ret <16 x i8> [[VCLSQ_V_I]] int8x16_t test_vclsq_s8(int8x16_t a) { return vclsq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vclsq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vclsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> [[VCLSQ_V_I]]) #4 +// CHECK: [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a) #4 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP1]] +// CHECK: ret <8 x i16> [[VCLSQ_V1_I]] int16x8_t test_vclsq_s16(int16x8_t a) { return vclsq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vclsq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vclsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> [[VCLSQ_V_I]]) #4 +// CHECK: [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a) #4 // CHECK: [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLSQ_V2_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP1]] +// CHECK: ret <4 x i32> [[VCLSQ_V1_I]] int32x4_t test_vclsq_s32(int32x4_t a) { return vclsq_s32(a); } - -// CHECK-LABEL: define <8 x i8> @test_vclt_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vclt_s8( // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -1969,7 +1808,7 @@ uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) { return vclt_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vclt_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vclt_s16( // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -1977,7 +1816,7 @@ uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) { return vclt_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vclt_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vclt_s32( // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1985,7 +1824,7 @@ uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) { return vclt_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vclt_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vclt_f32( // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -1993,7 +1832,7 @@ uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) { return vclt_f32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vclt_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vclt_u8( // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> // CHECK: ret <8 x i8> [[SEXT_I]] @@ -2001,7 +1840,7 @@ uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) { return vclt_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vclt_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vclt_u16( // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> // CHECK: ret <4 x i16> [[SEXT_I]] @@ -2009,7 +1848,7 @@ uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) { return vclt_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vclt_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vclt_u32( // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> // CHECK: ret <2 x i32> [[SEXT_I]] @@ -2017,7 +1856,7 @@ uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) { return vclt_u32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vcltq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vcltq_s8( // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2025,7 +1864,7 @@ uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) { return vcltq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcltq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vcltq_s16( // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2033,7 +1872,7 @@ uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) { return vcltq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcltq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vcltq_s32( // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2041,7 +1880,7 @@ uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) { return vcltq_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcltq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vcltq_f32( // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2049,7 +1888,7 @@ uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) { return vcltq_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vcltq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vcltq_u8( // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> // CHECK: ret <16 x i8> [[SEXT_I]] @@ -2057,7 +1896,7 @@ uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) { return vcltq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcltq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vcltq_u16( // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> // CHECK: ret <8 x i16> [[SEXT_I]] @@ -2065,7 +1904,7 @@ uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) { return vcltq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcltq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vcltq_u32( // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK: ret <4 x i32> [[SEXT_I]] @@ -2073,253 +1912,233 @@ uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) { return vcltq_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vclz_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vclz_s8( // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 // CHECK: ret <8 x i8> [[VCLZ_V_I]] int8x8_t test_vclz_s8(int8x8_t a) { return vclz_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vclz_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vclz_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VCLZ_V1_I]] int16x4_t test_vclz_s16(int16x4_t a) { return vclz_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vclz_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vclz_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VCLZ_V1_I]] int32x2_t test_vclz_s32(int32x2_t a) { return vclz_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vclz_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vclz_u8( // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false) #4 // CHECK: ret <8 x i8> [[VCLZ_V_I]] uint8x8_t test_vclz_u8(uint8x8_t a) { return vclz_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vclz_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vclz_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false) #4 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VCLZ_V1_I]] uint16x4_t test_vclz_u16(uint16x4_t a) { return vclz_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vclz_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vclz_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) #4 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VCLZ_V1_I]] uint32x2_t test_vclz_u32(uint32x2_t a) { return vclz_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vclzq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vclzq_s8( // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 // CHECK: ret <16 x i8> [[VCLZQ_V_I]] int8x16_t test_vclzq_s8(int8x16_t a) { return vclzq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vclzq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vclzq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #4 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP1]] +// CHECK: ret <8 x i16> [[VCLZQ_V1_I]] int16x8_t test_vclzq_s16(int16x8_t a) { return vclzq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vclzq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vclzq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #4 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP1]] +// CHECK: ret <4 x i32> [[VCLZQ_V1_I]] int32x4_t test_vclzq_s32(int32x4_t a) { return vclzq_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vclzq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vclzq_u8( // CHECK: [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) #4 // CHECK: ret <16 x i8> [[VCLZQ_V_I]] uint8x16_t test_vclzq_u8(uint8x16_t a) { return vclzq_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vclzq_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vclzq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[VCLZQ_V_I]], i1 false) #4 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) #4 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP1]] +// CHECK: ret <8 x i16> [[VCLZQ_V1_I]] uint16x8_t test_vclzq_u16(uint16x8_t a) { return vclzq_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vclzq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vclzq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VCLZQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[VCLZQ_V_I]], i1 false) #4 +// CHECK: [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) #4 // CHECK: [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCLZQ_V2_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP1]] +// CHECK: ret <4 x i32> [[VCLZQ_V1_I]] uint32x4_t test_vclzq_u32(uint32x4_t a) { return vclzq_u32(a); } - -// CHECK-LABEL: define <8 x i8> @test_vcnt_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcnt_u8( // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 // CHECK: ret <8 x i8> [[VCNT_V_I]] uint8x8_t test_vcnt_u8(uint8x8_t a) { return vcnt_u8(a); } -// CHECK-LABEL: define <8 x i8> @test_vcnt_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcnt_s8( // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 // CHECK: ret <8 x i8> [[VCNT_V_I]] int8x8_t test_vcnt_s8(int8x8_t a) { return vcnt_s8(a); } -// CHECK-LABEL: define <8 x i8> @test_vcnt_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vcnt_p8( // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a) #4 // CHECK: ret <8 x i8> [[VCNT_V_I]] poly8x8_t test_vcnt_p8(poly8x8_t a) { return vcnt_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vcntq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vcntq_u8( // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 // CHECK: ret <16 x i8> [[VCNTQ_V_I]] uint8x16_t test_vcntq_u8(uint8x16_t a) { return vcntq_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vcntq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vcntq_s8( // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 // CHECK: ret <16 x i8> [[VCNTQ_V_I]] int8x16_t test_vcntq_s8(int8x16_t a) { return vcntq_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vcntq_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vcntq_p8( // CHECK: [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) #4 // CHECK: ret <16 x i8> [[VCNTQ_V_I]] poly8x16_t test_vcntq_p8(poly8x16_t a) { return vcntq_p8(a); } - -// CHECK-LABEL: define <16 x i8> @test_vcombine_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vcombine_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) { return vcombine_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcombine_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vcombine_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) { return vcombine_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcombine_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vcombine_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) { return vcombine_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vcombine_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vcombine_s64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) { return vcombine_s64(a, b); } -// CHECK-LABEL: define <8 x half> @test_vcombine_f16(<4 x half> %a, <4 x half> %b) #0 { +// CHECK-LABEL: @test_vcombine_f16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> // CHECK: ret <8 x half> [[SHUFFLE_I]] float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) { return vcombine_f16(a, b); } -// CHECK-LABEL: define <4 x float> @test_vcombine_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vcombine_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I]] float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) { return vcombine_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vcombine_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vcombine_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) { return vcombine_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcombine_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vcombine_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) { return vcombine_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vcombine_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vcombine_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) { return vcombine_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vcombine_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vcombine_u64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> // CHECK: ret <2 x i64> [[SHUFFLE_I]] uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) { return vcombine_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vcombine_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vcombine_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) { return vcombine_p8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vcombine_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vcombine_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) { return vcombine_p16(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vcreate_s8(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_s8( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4 // CHECK: ret <8 x i8> [[VCLZ_V_I]] @@ -2327,45 +2146,41 @@ int8x8_t test_vcreate_s8(uint64_t a) { return vclz_s8(vcreate_s8(a)); } -// CHECK-LABEL: define <4 x i16> @test_vcreate_s16(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_s16( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false) #4 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VCLZ_V1_I]] int16x4_t test_vcreate_s16(uint64_t a) { return vclz_s16(vcreate_s16(a)); } -// CHECK-LABEL: define <2 x i32> @test_vcreate_s32(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_s32( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false) #4 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VCLZ_V1_I]] int32x2_t test_vcreate_s32(uint64_t a) { return vclz_s32(vcreate_s32(a)); } -// CHECK-LABEL: define <4 x half> @test_vcreate_f16(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_f16( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vcreate_f16(uint64_t a) { return vcreate_f16(a); } -// CHECK-LABEL: define <2 x float> @test_vcreate_f32(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_f32( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vcreate_f32(uint64_t a) { return vcreate_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vcreate_u8(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_u8( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> // CHECK: [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false) #4 // CHECK: ret <8 x i8> [[VCLZ_V_I]] @@ -2373,45 +2188,36 @@ uint8x8_t test_vcreate_u8(uint64_t a) { return vclz_s8(vcreate_u8(a)); } -// CHECK-LABEL: define <4 x i16> @test_vcreate_u16(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_u16( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[VCLZ_V_I]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false) #4 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VCLZ_V1_I]] uint16x4_t test_vcreate_u16(uint64_t a) { return vclz_s16(vcreate_u16(a)); } -// CHECK-LABEL: define <2 x i32> @test_vcreate_u32(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_u32( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <2 x i32> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK: [[VCLZ_V_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[VCLZ_V_I]], i1 false) #4 +// CHECK: [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false) #4 // CHECK: [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VCLZ_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VCLZ_V1_I]] uint32x2_t test_vcreate_u32(uint64_t a) { return vclz_s32(vcreate_u32(a)); } - -// We have two ways of lowering that. Either with one 'vmov d, r, r' or -// with two 'vmov d[],r'. LLVM does the latter. We may want to be less -// strict about the matching pattern if it starts causing problem. -// CHECK-LABEL: define <1 x i64> @test_vcreate_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_u64( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64> // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]] // CHECK: ret <1 x i64> [[ADD_I]] uint64x1_t test_vcreate_u64(uint64_t a) { uint64x1_t tmp = vcreate_u64(a); return vadd_u64(tmp, tmp); - } -// CHECK-LABEL: define <8 x i8> @test_vcreate_p8(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_p8( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <8 x i8> // CHECK: [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]]) #4 // CHECK: ret <8 x i8> [[VCNT_V_I]] @@ -2419,7 +2225,7 @@ poly8x8_t test_vcreate_p8(uint64_t a) { return vcnt_p8(vcreate_p8(a)); } -// CHECK-LABEL: define <4 x i16> @test_vcreate_p16(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_p16( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <4 x i16> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> @@ -2432,7 +2238,7 @@ poly16x4_t test_vcreate_p16(uint64_t a) { return vbsl_p16(tmp, tmp, tmp); } -// CHECK-LABEL: define <1 x i64> @test_vcreate_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vcreate_s64( // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64> // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]] // CHECK: ret <1 x i64> [[ADD_I]] @@ -2441,11 +2247,9 @@ int64x1_t test_vcreate_s64(uint64_t a) { return vadd_s64(tmp, tmp); } - -// CHECK-LABEL: define <4 x half> @test_vcvt_f16_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_f16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VCVT_F16_F32_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> [[VCVT_F16_F32_I]]) #4 +// CHECK: [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a) #4 // CHECK: [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half> // CHECK: ret <4 x half> [[TMP1]] @@ -2453,57 +2257,49 @@ float16x4_t test_vcvt_f16_f32(float32x4_t a) { return vcvt_f16_f32(a); } - -// CHECK-LABEL: define <2 x float> @test_vcvt_f32_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvt_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> +// CHECK: [[VCVT_I:%.*]] = sitofp <2 x i32> %a to <2 x float> // CHECK: ret <2 x float> [[VCVT_I]] float32x2_t test_vcvt_f32_s32(int32x2_t a) { return vcvt_f32_s32(a); } -// CHECK-LABEL: define <2 x float> @test_vcvt_f32_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvt_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> [[TMP1]] to <2 x float> +// CHECK: [[VCVT_I:%.*]] = uitofp <2 x i32> %a to <2 x float> // CHECK: ret <2 x float> [[VCVT_I]] float32x2_t test_vcvt_f32_u32(uint32x2_t a) { return vcvt_f32_u32(a); } -// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvtq_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> [[TMP1]] to <4 x float> +// CHECK: [[VCVT_I:%.*]] = sitofp <4 x i32> %a to <4 x float> // CHECK: ret <4 x float> [[VCVT_I]] float32x4_t test_vcvtq_f32_s32(int32x4_t a) { return vcvtq_f32_s32(a); } -// CHECK-LABEL: define <4 x float> @test_vcvtq_f32_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvtq_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x float> +// CHECK: [[VCVT_I:%.*]] = uitofp <4 x i32> %a to <4 x float> // CHECK: ret <4 x float> [[VCVT_I]] float32x4_t test_vcvtq_f32_u32(uint32x4_t a) { return vcvtq_f32_u32(a); } - -// CHECK-LABEL: define <4 x float> @test_vcvt_f32_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vcvt_f32_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> // CHECK: [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]]) #4 // CHECK: [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VCVT_F32_F162_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP1]] +// CHECK: ret <4 x float> [[VCVT_F32_F161_I]] float32x4_t test_vcvt_f32_f16(float16x4_t a) { return vcvt_f32_f16(a); } - -// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1) @@ -2512,7 +2308,7 @@ float32x2_t test_vcvt_n_f32_s32(int32x2_t a) { return vcvt_n_f32_s32(a, 1); } -// CHECK-LABEL: define <2 x float> @test_vcvt_n_f32_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1) @@ -2521,7 +2317,7 @@ float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) { return vcvt_n_f32_u32(a, 1); } -// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3) @@ -2530,7 +2326,7 @@ float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { return vcvtq_n_f32_s32(a, 3); } -// CHECK-LABEL: define <4 x float> @test_vcvtq_n_f32_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3) @@ -2539,8 +2335,7 @@ float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { return vcvtq_n_f32_u32(a, 3); } - -// CHECK-LABEL: define <2 x i32> @test_vcvt_n_s32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1) @@ -2549,7 +2344,7 @@ int32x2_t test_vcvt_n_s32_f32(float32x2_t a) { return vcvt_n_s32_f32(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_s32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3) @@ -2558,8 +2353,7 @@ int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { return vcvtq_n_s32_f32(a, 3); } - -// CHECK-LABEL: define <2 x i32> @test_vcvt_n_u32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_n_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1) @@ -2568,7 +2362,7 @@ uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) { return vcvt_n_u32_f32(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vcvtq_n_u32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtq_n_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3) @@ -2577,201 +2371,193 @@ uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { return vcvtq_n_u32_f32(a, 3); } - -// CHECK-LABEL: define <2 x i32> @test_vcvt_s32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVT_I:%.*]] = fptosi <2 x float> [[TMP1]] to <2 x i32> +// CHECK: [[VCVT_I:%.*]] = fptosi <2 x float> %a to <2 x i32> // CHECK: ret <2 x i32> [[VCVT_I]] int32x2_t test_vcvt_s32_f32(float32x2_t a) { return vcvt_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtq_s32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVT_I:%.*]] = fptosi <4 x float> [[TMP1]] to <4 x i32> +// CHECK: [[VCVT_I:%.*]] = fptosi <4 x float> %a to <4 x i32> // CHECK: ret <4 x i32> [[VCVT_I]] int32x4_t test_vcvtq_s32_f32(float32x4_t a) { return vcvtq_s32_f32(a); } - -// CHECK-LABEL: define <2 x i32> @test_vcvt_u32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vcvt_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VCVT_I:%.*]] = fptoui <2 x float> [[TMP1]] to <2 x i32> +// CHECK: [[VCVT_I:%.*]] = fptoui <2 x float> %a to <2 x i32> // CHECK: ret <2 x i32> [[VCVT_I]] uint32x2_t test_vcvt_u32_f32(float32x2_t a) { return vcvt_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vcvtq_u32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vcvtq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VCVT_I:%.*]] = fptoui <4 x float> [[TMP1]] to <4 x i32> +// CHECK: [[VCVT_I:%.*]] = fptoui <4 x float> %a to <4 x i32> // CHECK: ret <4 x i32> [[VCVT_I]] uint32x4_t test_vcvtq_u32_f32(float32x4_t a) { return vcvtq_u32_f32(a); } - -// CHECK-LABEL: define <8 x i8> @test_vdup_lane_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_u8( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE]] uint8x8_t test_vdup_lane_u8(uint8x8_t a) { return vdup_lane_u8(a, 7); } -// CHECK-LABEL: define <4 x i16> @test_vdup_lane_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE]] uint16x4_t test_vdup_lane_u16(uint16x4_t a) { return vdup_lane_u16(a, 3); } -// CHECK-LABEL: define <2 x i32> @test_vdup_lane_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE]] uint32x2_t test_vdup_lane_u32(uint32x2_t a) { return vdup_lane_u32(a, 1); } -// CHECK-LABEL: define <8 x i8> @test_vdup_lane_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_s8( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE]] int8x8_t test_vdup_lane_s8(int8x8_t a) { return vdup_lane_s8(a, 7); } -// CHECK-LABEL: define <4 x i16> @test_vdup_lane_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE]] int16x4_t test_vdup_lane_s16(int16x4_t a) { return vdup_lane_s16(a, 3); } -// CHECK-LABEL: define <2 x i32> @test_vdup_lane_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE]] int32x2_t test_vdup_lane_s32(int32x2_t a) { return vdup_lane_s32(a, 1); } -// CHECK-LABEL: define <8 x i8> @test_vdup_lane_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_p8( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE]] poly8x8_t test_vdup_lane_p8(poly8x8_t a) { return vdup_lane_p8(a, 7); } -// CHECK-LABEL: define <4 x i16> @test_vdup_lane_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_p16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE]] poly16x4_t test_vdup_lane_p16(poly16x4_t a) { return vdup_lane_p16(a, 3); } -// CHECK-LABEL: define <2 x float> @test_vdup_lane_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE]] float32x2_t test_vdup_lane_f32(float32x2_t a) { return vdup_lane_f32(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_u8( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE]] uint8x16_t test_vdupq_lane_u8(uint8x8_t a) { return vdupq_lane_u8(a, 7); } -// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE]] uint16x8_t test_vdupq_lane_u16(uint16x4_t a) { return vdupq_lane_u16(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vdupq_lane_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE]] uint32x4_t test_vdupq_lane_u32(uint32x2_t a) { return vdupq_lane_u32(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_s8( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE]] int8x16_t test_vdupq_lane_s8(int8x8_t a) { return vdupq_lane_s8(a, 7); } -// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE]] int16x8_t test_vdupq_lane_s16(int16x4_t a) { return vdupq_lane_s16(a, 3); } -// CHECK-LABEL: define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE]] int32x4_t test_vdupq_lane_s32(int32x2_t a) { return vdupq_lane_s32(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vdupq_lane_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_p8( // CHECK: [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE]] poly8x16_t test_vdupq_lane_p8(poly8x8_t a) { return vdupq_lane_p8(a, 7); } -// CHECK-LABEL: define <8 x i16> @test_vdupq_lane_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_p16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE]] poly16x8_t test_vdupq_lane_p16(poly16x4_t a) { return vdupq_lane_p16(a, 3); } -// CHECK-LABEL: define <4 x float> @test_vdupq_lane_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE]] float32x4_t test_vdupq_lane_f32(float32x2_t a) { return vdupq_lane_f32(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vdup_lane_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_s64( // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <1 x i32> zeroinitializer // CHECK: ret <1 x i64> [[SHUFFLE]] int64x1_t test_vdup_lane_s64(int64x1_t a) { return vdup_lane_s64(a, 0); } -// CHECK-LABEL: define <1 x i64> @test_vdup_lane_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vdup_lane_u64( // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <1 x i32> zeroinitializer // CHECK: ret <1 x i64> [[SHUFFLE]] uint64x1_t test_vdup_lane_u64(uint64x1_t a) { return vdup_lane_u64(a, 0); } -// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_s64( // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <2 x i32> zeroinitializer // CHECK: ret <2 x i64> [[SHUFFLE]] int64x2_t test_vdupq_lane_s64(int64x1_t a) { return vdupq_lane_s64(a, 0); } -// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vdupq_lane_u64( // CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %a, <2 x i32> zeroinitializer // CHECK: ret <2 x i64> [[SHUFFLE]] uint64x2_t test_vdupq_lane_u64(uint64x1_t a) { return vdupq_lane_u64(a, 0); } - -// CHECK-LABEL: define <8 x i8> @test_vdup_n_u8(i8 zeroext %a) #0 { +// CHECK-LABEL: @test_vdup_n_u8( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -2785,7 +2571,7 @@ uint8x8_t test_vdup_n_u8(uint8_t a) { return vdup_n_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vdup_n_u16(i16 zeroext %a) #0 { +// CHECK-LABEL: @test_vdup_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -2795,7 +2581,7 @@ uint16x4_t test_vdup_n_u16(uint16_t a) { return vdup_n_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vdup_n_u32(i32 %a) #0 { +// CHECK-LABEL: @test_vdup_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 // CHECK: ret <2 x i32> [[VECINIT1_I]] @@ -2803,7 +2589,7 @@ uint32x2_t test_vdup_n_u32(uint32_t a) { return vdup_n_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vdup_n_s8(i8 signext %a) #0 { +// CHECK-LABEL: @test_vdup_n_s8( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -2817,7 +2603,7 @@ int8x8_t test_vdup_n_s8(int8_t a) { return vdup_n_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vdup_n_s16(i16 signext %a) #0 { +// CHECK-LABEL: @test_vdup_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -2827,7 +2613,7 @@ int16x4_t test_vdup_n_s16(int16_t a) { return vdup_n_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vdup_n_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vdup_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 // CHECK: ret <2 x i32> [[VECINIT1_I]] @@ -2835,7 +2621,7 @@ int32x2_t test_vdup_n_s32(int32_t a) { return vdup_n_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vdup_n_p8(i8 signext %a) #0 { +// CHECK-LABEL: @test_vdup_n_p8( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -2849,7 +2635,7 @@ poly8x8_t test_vdup_n_p8(poly8_t a) { return vdup_n_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vdup_n_p16(i16 signext %a) #0 { +// CHECK-LABEL: @test_vdup_n_p16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -2859,7 +2645,7 @@ poly16x4_t test_vdup_n_p16(poly16_t a) { return vdup_n_p16(a); } -// CHECK-LABEL: define <4 x half> @test_vdup_n_f16(half* %a) #0 { +// CHECK-LABEL: @test_vdup_n_f16( // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1 @@ -2870,7 +2656,7 @@ float16x4_t test_vdup_n_f16(float16_t *a) { return vdup_n_f16(*a); } -// CHECK-LABEL: define <2 x float> @test_vdup_n_f32(float %a) #0 { +// CHECK-LABEL: @test_vdup_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1 // CHECK: ret <2 x float> [[VECINIT1_I]] @@ -2878,7 +2664,7 @@ float32x2_t test_vdup_n_f32(float32_t a) { return vdup_n_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vdupq_n_u8(i8 zeroext %a) #0 { +// CHECK-LABEL: @test_vdupq_n_u8( // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -2900,7 +2686,7 @@ uint8x16_t test_vdupq_n_u8(uint8_t a) { return vdupq_n_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vdupq_n_u16(i16 zeroext %a) #0 { +// CHECK-LABEL: @test_vdupq_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -2914,7 +2700,7 @@ uint16x8_t test_vdupq_n_u16(uint16_t a) { return vdupq_n_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vdupq_n_u32(i32 %a) #0 { +// CHECK-LABEL: @test_vdupq_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 @@ -2924,7 +2710,7 @@ uint32x4_t test_vdupq_n_u32(uint32_t a) { return vdupq_n_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vdupq_n_s8(i8 signext %a) #0 { +// CHECK-LABEL: @test_vdupq_n_s8( // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -2946,7 +2732,7 @@ int8x16_t test_vdupq_n_s8(int8_t a) { return vdupq_n_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vdupq_n_s16(i16 signext %a) #0 { +// CHECK-LABEL: @test_vdupq_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -2960,7 +2746,7 @@ int16x8_t test_vdupq_n_s16(int16_t a) { return vdupq_n_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vdupq_n_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vdupq_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 @@ -2970,7 +2756,7 @@ int32x4_t test_vdupq_n_s32(int32_t a) { return vdupq_n_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vdupq_n_p8(i8 signext %a) #0 { +// CHECK-LABEL: @test_vdupq_n_p8( // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -2992,7 +2778,7 @@ poly8x16_t test_vdupq_n_p8(poly8_t a) { return vdupq_n_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vdupq_n_p16(i16 signext %a) #0 { +// CHECK-LABEL: @test_vdupq_n_p16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -3006,7 +2792,7 @@ poly16x8_t test_vdupq_n_p16(poly16_t a) { return vdupq_n_p16(a); } -// CHECK-LABEL: define <8 x half> @test_vdupq_n_f16(half* %a) #0 { +// CHECK-LABEL: @test_vdupq_n_f16( // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1 @@ -3021,7 +2807,7 @@ float16x8_t test_vdupq_n_f16(float16_t *a) { return vdupq_n_f16(*a); } -// CHECK-LABEL: define <4 x float> @test_vdupq_n_f32(float %a) #0 { +// CHECK-LABEL: @test_vdupq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2 @@ -3031,7 +2817,7 @@ float32x4_t test_vdupq_n_f32(float32_t a) { return vdupq_n_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vdup_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vdup_n_s64( // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] // CHECK: ret <1 x i64> [[ADD_I]] @@ -3040,17 +2826,16 @@ int64x1_t test_vdup_n_s64(int64_t a) { return vadd_s64(tmp, tmp); } -// CHECK-LABEL: define <1 x i64> @test_vdup_n_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vdup_n_u64( // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] // CHECK: ret <1 x i64> [[ADD_I]] uint64x1_t test_vdup_n_u64(uint64_t a) { int64x1_t tmp = vdup_n_u64(a); return vadd_s64(tmp, tmp); - } -// CHECK-LABEL: define <2 x i64> @test_vdupq_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vdupq_n_s64( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]] @@ -3060,7 +2845,7 @@ int64x2_t test_vdupq_n_s64(int64_t a) { return vaddq_s64(tmp, tmp); } -// CHECK-LABEL: define <2 x i64> @test_vdupq_n_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vdupq_n_u64( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]] @@ -3070,142 +2855,140 @@ uint64x2_t test_vdupq_n_u64(uint64_t a) { return vaddq_u64(tmp, tmp); } - -// CHECK-LABEL: define <8 x i8> @test_veor_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_veor_s8( // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b // CHECK: ret <8 x i8> [[XOR_I]] int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) { return veor_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_veor_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_veor_s16( // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b // CHECK: ret <4 x i16> [[XOR_I]] int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) { return veor_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_veor_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_veor_s32( // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b // CHECK: ret <2 x i32> [[XOR_I]] int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) { return veor_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_veor_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_veor_s64( // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b // CHECK: ret <1 x i64> [[XOR_I]] int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) { return veor_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_veor_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_veor_u8( // CHECK: [[XOR_I:%.*]] = xor <8 x i8> %a, %b // CHECK: ret <8 x i8> [[XOR_I]] uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) { return veor_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_veor_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_veor_u16( // CHECK: [[XOR_I:%.*]] = xor <4 x i16> %a, %b // CHECK: ret <4 x i16> [[XOR_I]] uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) { return veor_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_veor_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_veor_u32( // CHECK: [[XOR_I:%.*]] = xor <2 x i32> %a, %b // CHECK: ret <2 x i32> [[XOR_I]] uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) { return veor_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_veor_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_veor_u64( // CHECK: [[XOR_I:%.*]] = xor <1 x i64> %a, %b // CHECK: ret <1 x i64> [[XOR_I]] uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) { return veor_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_veorq_s8( // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b // CHECK: ret <16 x i8> [[XOR_I]] int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) { return veorq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_veorq_s16( // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b // CHECK: ret <8 x i16> [[XOR_I]] int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) { return veorq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_veorq_s32( // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b // CHECK: ret <4 x i32> [[XOR_I]] int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) { return veorq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_veorq_s64( // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b // CHECK: ret <2 x i64> [[XOR_I]] int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) { return veorq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_veorq_u8( // CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b // CHECK: ret <16 x i8> [[XOR_I]] uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) { return veorq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_veorq_u16( // CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b // CHECK: ret <8 x i16> [[XOR_I]] uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) { return veorq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_veorq_u32( // CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b // CHECK: ret <4 x i32> [[XOR_I]] uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) { return veorq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_veorq_u64( // CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b // CHECK: ret <2 x i64> [[XOR_I]] uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) { return veorq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vext_s8( // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[VEXT]] int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) { return vext_s8(a, b, 7); } -// CHECK-LABEL: define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vext_u8( // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[VEXT]] uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) { return vext_u8(a, b, 7); } -// CHECK-LABEL: define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vext_p8( // CHECK: [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> // CHECK: ret <8 x i8> [[VEXT]] poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) { return vext_p8(a, b, 7); } -// CHECK-LABEL: define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vext_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -3216,7 +2999,7 @@ int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) { return vext_s16(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vext_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -3227,7 +3010,7 @@ uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) { return vext_u16(a, b, 3); } -// CHECK-LABEL: define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vext_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -3238,7 +3021,7 @@ poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) { return vext_p16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vext_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -3249,7 +3032,7 @@ int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) { return vext_s32(a, b, 1); } -// CHECK-LABEL: define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vext_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -3260,7 +3043,7 @@ uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) { return vext_u32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vext_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -3271,7 +3054,7 @@ int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) { return vext_s64(a, b, 0); } -// CHECK-LABEL: define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vext_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -3282,7 +3065,7 @@ uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) { return vext_u64(a, b, 0); } -// CHECK-LABEL: define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vext_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> @@ -3293,28 +3076,28 @@ float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) { return vext_f32(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vextq_s8( // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[VEXT]] int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) { return vextq_s8(a, b, 15); } -// CHECK-LABEL: define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vextq_u8( // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[VEXT]] uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) { return vextq_u8(a, b, 15); } -// CHECK-LABEL: define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vextq_p8( // CHECK: [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> // CHECK: ret <16 x i8> [[VEXT]] poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) { return vextq_p8(a, b, 15); } -// CHECK-LABEL: define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vextq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -3325,7 +3108,7 @@ int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) { return vextq_s16(a, b, 7); } -// CHECK-LABEL: define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vextq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -3336,7 +3119,7 @@ uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) { return vextq_u16(a, b, 7); } -// CHECK-LABEL: define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vextq_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -3347,7 +3130,7 @@ poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) { return vextq_p16(a, b, 7); } -// CHECK-LABEL: define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vextq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -3358,7 +3141,7 @@ int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) { return vextq_s32(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vextq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -3369,7 +3152,7 @@ uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) { return vextq_u32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vextq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -3380,7 +3163,7 @@ int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) { return vextq_s64(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vextq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -3391,7 +3174,7 @@ uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) { return vextq_u64(a, b, 1); } -// CHECK-LABEL: define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vextq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> @@ -3402,155 +3185,140 @@ float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) { return vextq_f32(a, b, 3); } - -// CHECK-LABEL: define <2 x float> @test_vfma_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { +// CHECK-LABEL: @test_vfma_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 -// CHECK: ret <2 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a) #4 +// CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vfma_f32(a, b, c); } -// CHECK-LABEL: define <4 x float> @test_vfmaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +// CHECK-LABEL: @test_vfmaq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 -// CHECK: ret <4 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a) #4 +// CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { return vfmaq_f32(a, b, c); } -// CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { +// CHECK-LABEL: @test_vfms_f32( // CHECK: [[SUB_I:%.*]] = fsub <2 x float> , %b // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 -// CHECK: ret <2 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %c, <2 x float> %a) #4 +// CHECK: ret <2 x float> [[TMP3]] float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vfms_f32(a, b, c); } -// CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +// CHECK-LABEL: @test_vfmsq_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> , %b // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 -// CHECK: ret <4 x float> [[TMP6]] +// CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %c, <4 x float> %a) #4 +// CHECK: ret <4 x float> [[TMP3]] float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { return vfmsq_f32(a, b, c); } - -// CHECK-LABEL: define <8 x i8> @test_vget_high_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vget_high_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vget_high_s8(int8x16_t a) { return vget_high_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vget_high_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vget_high_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vget_high_s16(int16x8_t a) { return vget_high_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vget_high_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vget_high_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] int32x2_t test_vget_high_s32(int32x4_t a) { return vget_high_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vget_high_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vget_high_s64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> // CHECK: ret <1 x i64> [[SHUFFLE_I]] int64x1_t test_vget_high_s64(int64x2_t a) { return vget_high_s64(a); } -// CHECK-LABEL: define <4 x half> @test_vget_high_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vget_high_f16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> // CHECK: ret <4 x half> [[SHUFFLE_I]] float16x4_t test_vget_high_f16(float16x8_t a) { return vget_high_f16(a); } -// CHECK-LABEL: define <2 x float> @test_vget_high_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vget_high_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE_I]] float32x2_t test_vget_high_f32(float32x4_t a) { return vget_high_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vget_high_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vget_high_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vget_high_u8(uint8x16_t a) { return vget_high_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vget_high_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vget_high_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vget_high_u16(uint16x8_t a) { return vget_high_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vget_high_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vget_high_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] uint32x2_t test_vget_high_u32(uint32x4_t a) { return vget_high_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vget_high_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vget_high_u64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> // CHECK: ret <1 x i64> [[SHUFFLE_I]] uint64x1_t test_vget_high_u64(uint64x2_t a) { return vget_high_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vget_high_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vget_high_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vget_high_p8(poly8x16_t a) { return vget_high_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vget_high_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vget_high_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vget_high_p16(poly16x8_t a) { return vget_high_p16(a); } - -// CHECK-LABEL: define zeroext i8 @test_vget_lane_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vget_lane_u8( // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 // CHECK: ret i8 [[VGET_LANE]] uint8_t test_vget_lane_u8(uint8x8_t a) { return vget_lane_u8(a, 7); } -// CHECK-LABEL: define zeroext i16 @test_vget_lane_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vget_lane_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 @@ -3559,7 +3327,7 @@ uint16_t test_vget_lane_u16(uint16x4_t a) { return vget_lane_u16(a, 3); } -// CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vget_lane_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 @@ -3568,14 +3336,14 @@ uint32_t test_vget_lane_u32(uint32x2_t a) { return vget_lane_u32(a, 1); } -// CHECK-LABEL: define signext i8 @test_vget_lane_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vget_lane_s8( // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 // CHECK: ret i8 [[VGET_LANE]] int8_t test_vget_lane_s8(int8x8_t a) { return vget_lane_s8(a, 7); } -// CHECK-LABEL: define signext i16 @test_vget_lane_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vget_lane_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 @@ -3584,7 +3352,7 @@ int16_t test_vget_lane_s16(int16x4_t a) { return vget_lane_s16(a, 3); } -// CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vget_lane_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 @@ -3593,14 +3361,14 @@ int32_t test_vget_lane_s32(int32x2_t a) { return vget_lane_s32(a, 1); } -// CHECK-LABEL: define signext i8 @test_vget_lane_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vget_lane_p8( // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7 // CHECK: ret i8 [[VGET_LANE]] poly8_t test_vget_lane_p8(poly8x8_t a) { return vget_lane_p8(a, 7); } -// CHECK-LABEL: define signext i16 @test_vget_lane_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vget_lane_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 @@ -3609,7 +3377,7 @@ poly16_t test_vget_lane_p16(poly16x4_t a) { return vget_lane_p16(a, 3); } -// CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vget_lane_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 @@ -3618,7 +3386,7 @@ float32_t test_vget_lane_f32(float32x2_t a) { return vget_lane_f32(a, 1); } -// CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vget_lane_f16( // CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8 // CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2 // CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8 @@ -3636,14 +3404,14 @@ float32_t test_vget_lane_f16(float16x4_t a) { return vget_lane_f16(a, 1); } -// CHECK-LABEL: define zeroext i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_u8( // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 // CHECK: ret i8 [[VGET_LANE]] uint8_t test_vgetq_lane_u8(uint8x16_t a) { return vgetq_lane_u8(a, 15); } -// CHECK-LABEL: define zeroext i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -3652,7 +3420,7 @@ uint16_t test_vgetq_lane_u16(uint16x8_t a) { return vgetq_lane_u16(a, 7); } -// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -3661,14 +3429,14 @@ uint32_t test_vgetq_lane_u32(uint32x4_t a) { return vgetq_lane_u32(a, 3); } -// CHECK-LABEL: define signext i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_s8( // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 // CHECK: ret i8 [[VGET_LANE]] int8_t test_vgetq_lane_s8(int8x16_t a) { return vgetq_lane_s8(a, 15); } -// CHECK-LABEL: define signext i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -3677,7 +3445,7 @@ int16_t test_vgetq_lane_s16(int16x8_t a) { return vgetq_lane_s16(a, 7); } -// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -3686,14 +3454,14 @@ int32_t test_vgetq_lane_s32(int32x4_t a) { return vgetq_lane_s32(a, 3); } -// CHECK-LABEL: define signext i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_p8( // CHECK: [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15 // CHECK: ret i8 [[VGET_LANE]] poly8_t test_vgetq_lane_p8(poly8x16_t a) { return vgetq_lane_p8(a, 15); } -// CHECK-LABEL: define signext i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 @@ -3702,7 +3470,7 @@ poly16_t test_vgetq_lane_p16(poly16x8_t a) { return vgetq_lane_p16(a, 7); } -// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 @@ -3711,7 +3479,7 @@ float32_t test_vgetq_lane_f32(float32x4_t a) { return vgetq_lane_f32(a, 3); } -// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_f16( // CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16 // CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2 // CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16 @@ -3729,8 +3497,7 @@ float32_t test_vgetq_lane_f16(float16x8_t a) { return vgetq_lane_f16(a, 3); } -// The optimizer is able to remove all moves now. -// CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vget_lane_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 @@ -3739,8 +3506,7 @@ int64_t test_vget_lane_s64(int64x1_t a) { return vget_lane_s64(a, 0); } -// The optimizer is able to remove all moves now. -// CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vget_lane_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 @@ -3749,7 +3515,7 @@ uint64_t test_vget_lane_u64(uint64x1_t a) { return vget_lane_u64(a, 0); } -// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -3758,7 +3524,7 @@ int64_t test_vgetq_lane_s64(int64x2_t a) { return vgetq_lane_s64(a, 1); } -// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vgetq_lane_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 @@ -3767,366 +3533,314 @@ uint64_t test_vgetq_lane_u64(uint64x2_t a) { return vgetq_lane_u64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vget_low_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vget_low_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vget_low_s8(int8x16_t a) { return vget_low_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vget_low_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vget_low_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vget_low_s16(int16x8_t a) { return vget_low_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vget_low_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vget_low_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] int32x2_t test_vget_low_s32(int32x4_t a) { return vget_low_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vget_low_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vget_low_s64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer // CHECK: ret <1 x i64> [[SHUFFLE_I]] int64x1_t test_vget_low_s64(int64x2_t a) { return vget_low_s64(a); } -// CHECK-LABEL: define <4 x half> @test_vget_low_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vget_low_f16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> // CHECK: ret <4 x half> [[SHUFFLE_I]] float16x4_t test_vget_low_f16(float16x8_t a) { return vget_low_f16(a); } -// CHECK-LABEL: define <2 x float> @test_vget_low_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vget_low_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE_I]] float32x2_t test_vget_low_f32(float32x4_t a) { return vget_low_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vget_low_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vget_low_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vget_low_u8(uint8x16_t a) { return vget_low_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vget_low_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vget_low_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vget_low_u16(uint16x8_t a) { return vget_low_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vget_low_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vget_low_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] uint32x2_t test_vget_low_u32(uint32x4_t a) { return vget_low_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vget_low_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vget_low_u64( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer // CHECK: ret <1 x i64> [[SHUFFLE_I]] uint64x1_t test_vget_low_u64(uint64x2_t a) { return vget_low_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vget_low_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vget_low_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vget_low_p8(poly8x16_t a) { return vget_low_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vget_low_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vget_low_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vget_low_p16(poly16x8_t a) { return vget_low_p16(a); } - -// CHECK-LABEL: define <8 x i8> @test_vhadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vhadd_s8( // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VHADD_V_I]] int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) { return vhadd_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vhadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vhadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VHADD_V2_I]] int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) { return vhadd_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vhadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vhadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VHADD_V2_I]] int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) { return vhadd_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vhadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vhadd_u8( // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VHADD_V_I]] uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) { return vhadd_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vhadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vhadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> [[VHADD_V_I]], <4 x i16> [[VHADD_V1_I]]) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VHADD_V2_I]] uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) { return vhadd_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vhadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vhadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> [[VHADD_V_I]], <2 x i32> [[VHADD_V1_I]]) #4 +// CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VHADD_V2_I]] uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) { return vhadd_u32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vhaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vhaddq_s8( // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VHADDQ_V_I]] int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) { return vhaddq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vhaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vhaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VHADDQ_V2_I]] int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) { return vhaddq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vhaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vhaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VHADDQ_V2_I]] int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) { return vhaddq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vhaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vhaddq_u8( // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VHADDQ_V_I]] uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) { return vhaddq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vhaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vhaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> [[VHADDQ_V_I]], <8 x i16> [[VHADDQ_V1_I]]) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VHADDQ_V2_I]] uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) { return vhaddq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vhaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vhaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> [[VHADDQ_V_I]], <4 x i32> [[VHADDQ_V1_I]]) #4 +// CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VHADDQ_V2_I]] uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) { return vhaddq_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vhsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vhsub_s8( // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VHSUB_V_I]] int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) { return vhsub_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vhsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vhsub_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VHSUB_V2_I]] int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) { return vhsub_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vhsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vhsub_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VHSUB_V2_I]] int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) { return vhsub_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vhsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vhsub_u8( // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VHSUB_V_I]] uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) { return vhsub_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vhsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vhsub_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> [[VHSUB_V_I]], <4 x i16> [[VHSUB_V1_I]]) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VHSUB_V2_I]] uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) { return vhsub_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vhsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vhsub_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VHSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VHSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> [[VHSUB_V_I]], <2 x i32> [[VHSUB_V1_I]]) #4 +// CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VHSUB_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VHSUB_V2_I]] uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) { return vhsub_u32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vhsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vhsubq_s8( // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) { return vhsubq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vhsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vhsubq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) { return vhsubq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vhsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vhsubq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) { return vhsubq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vhsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vhsubq_u8( // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) { return vhsubq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vhsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vhsubq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> [[VHSUBQ_V_I]], <8 x i16> [[VHSUBQ_V1_I]]) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) { return vhsubq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vhsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vhsubq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VHSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VHSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> [[VHSUBQ_V_I]], <4 x i32> [[VHSUBQ_V1_I]]) #4 +// CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VHSUBQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) { return vhsubq_u32(a, b); } - -// CHECK-LABEL: define <16 x i8> @test_vld1q_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_u8( // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1) // CHECK: ret <16 x i8> [[VLD1]] uint8x16_t test_vld1q_u8(uint8_t const * a) { return vld1q_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) // CHECK: ret <8 x i16> [[VLD1]] @@ -4134,7 +3848,7 @@ uint16x8_t test_vld1q_u16(uint16_t const * a) { return vld1q_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vld1q_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4) // CHECK: ret <4 x i32> [[VLD1]] @@ -4142,7 +3856,7 @@ uint32x4_t test_vld1q_u32(uint32_t const * a) { return vld1q_u32(a); } -// CHECK-LABEL: define <2 x i64> @test_vld1q_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4) // CHECK: ret <2 x i64> [[VLD1]] @@ -4150,14 +3864,14 @@ uint64x2_t test_vld1q_u64(uint64_t const * a) { return vld1q_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vld1q_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_s8( // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1) // CHECK: ret <16 x i8> [[VLD1]] int8x16_t test_vld1q_s8(int8_t const * a) { return vld1q_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) // CHECK: ret <8 x i16> [[VLD1]] @@ -4165,7 +3879,7 @@ int16x8_t test_vld1q_s16(int16_t const * a) { return vld1q_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vld1q_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4) // CHECK: ret <4 x i32> [[VLD1]] @@ -4173,7 +3887,7 @@ int32x4_t test_vld1q_s32(int32_t const * a) { return vld1q_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vld1q_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4) // CHECK: ret <2 x i64> [[VLD1]] @@ -4181,7 +3895,7 @@ int64x2_t test_vld1q_s64(int64_t const * a) { return vld1q_s64(a); } -// CHECK-LABEL: define <8 x half> @test_vld1q_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld1q_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VLD1]] to <8 x half> @@ -4190,7 +3904,7 @@ float16x8_t test_vld1q_f16(float16_t const * a) { return vld1q_f16(a); } -// CHECK-LABEL: define <4 x float> @test_vld1q_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld1q_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* [[TMP0]], i32 4) // CHECK: ret <4 x float> [[VLD1]] @@ -4198,14 +3912,14 @@ float32x4_t test_vld1q_f32(float32_t const * a) { return vld1q_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vld1q_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_p8( // CHECK: [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1) // CHECK: ret <16 x i8> [[VLD1]] poly8x16_t test_vld1q_p8(poly8_t const * a) { return vld1q_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2) // CHECK: ret <8 x i16> [[VLD1]] @@ -4213,14 +3927,14 @@ poly16x8_t test_vld1q_p16(poly16_t const * a) { return vld1q_p16(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_u8( // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1) // CHECK: ret <8 x i8> [[VLD1]] uint8x8_t test_vld1_u8(uint8_t const * a) { return vld1_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) // CHECK: ret <4 x i16> [[VLD1]] @@ -4228,7 +3942,7 @@ uint16x4_t test_vld1_u16(uint16_t const * a) { return vld1_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vld1_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4) // CHECK: ret <2 x i32> [[VLD1]] @@ -4236,7 +3950,7 @@ uint32x2_t test_vld1_u32(uint32_t const * a) { return vld1_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vld1_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4) // CHECK: ret <1 x i64> [[VLD1]] @@ -4244,14 +3958,14 @@ uint64x1_t test_vld1_u64(uint64_t const * a) { return vld1_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_s8( // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1) // CHECK: ret <8 x i8> [[VLD1]] int8x8_t test_vld1_s8(int8_t const * a) { return vld1_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) // CHECK: ret <4 x i16> [[VLD1]] @@ -4259,7 +3973,7 @@ int16x4_t test_vld1_s16(int16_t const * a) { return vld1_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vld1_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4) // CHECK: ret <2 x i32> [[VLD1]] @@ -4267,7 +3981,7 @@ int32x2_t test_vld1_s32(int32_t const * a) { return vld1_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vld1_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4) // CHECK: ret <1 x i64> [[VLD1]] @@ -4275,7 +3989,7 @@ int64x1_t test_vld1_s64(int64_t const * a) { return vld1_s64(a); } -// CHECK-LABEL: define <4 x half> @test_vld1_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld1_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VLD1]] to <4 x half> @@ -4284,7 +3998,7 @@ float16x4_t test_vld1_f16(float16_t const * a) { return vld1_f16(a); } -// CHECK-LABEL: define <2 x float> @test_vld1_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld1_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* [[TMP0]], i32 4) // CHECK: ret <2 x float> [[VLD1]] @@ -4292,14 +4006,14 @@ float32x2_t test_vld1_f32(float32_t const * a) { return vld1_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_p8( // CHECK: [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1) // CHECK: ret <8 x i8> [[VLD1]] poly8x8_t test_vld1_p8(poly8_t const * a) { return vld1_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2) // CHECK: ret <4 x i16> [[VLD1]] @@ -4307,8 +4021,7 @@ poly16x4_t test_vld1_p16(poly16_t const * a) { return vld1_p16(a); } - -// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_u8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer @@ -4317,7 +4030,7 @@ uint8x16_t test_vld1q_dup_u8(uint8_t const * a) { return vld1q_dup_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 @@ -4328,7 +4041,7 @@ uint16x8_t test_vld1q_dup_u16(uint16_t const * a) { return vld1q_dup_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vld1q_dup_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -4339,7 +4052,7 @@ uint32x4_t test_vld1q_dup_u32(uint32_t const * a) { return vld1q_dup_u32(a); } -// CHECK-LABEL: define <2 x i64> @test_vld1q_dup_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 @@ -4350,7 +4063,7 @@ uint64x2_t test_vld1q_dup_u64(uint64_t const * a) { return vld1q_dup_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_s8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer @@ -4359,7 +4072,7 @@ int8x16_t test_vld1q_dup_s8(int8_t const * a) { return vld1q_dup_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 @@ -4370,7 +4083,7 @@ int16x8_t test_vld1q_dup_s16(int16_t const * a) { return vld1q_dup_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vld1q_dup_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -4381,7 +4094,7 @@ int32x4_t test_vld1q_dup_s32(int32_t const * a) { return vld1q_dup_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vld1q_dup_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 @@ -4392,7 +4105,7 @@ int64x2_t test_vld1q_dup_s64(int64_t const * a) { return vld1q_dup_s64(a); } -// CHECK-LABEL: define <8 x half> @test_vld1q_dup_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 @@ -4404,7 +4117,7 @@ float16x8_t test_vld1q_dup_f16(float16_t const * a) { return vld1q_dup_f16(a); } -// CHECK-LABEL: define <4 x float> @test_vld1q_dup_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float* // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4 @@ -4415,7 +4128,7 @@ float32x4_t test_vld1q_dup_f32(float32_t const * a) { return vld1q_dup_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vld1q_dup_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_p8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0 // CHECK: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer @@ -4424,7 +4137,7 @@ poly8x16_t test_vld1q_dup_p8(poly8_t const * a) { return vld1q_dup_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_dup_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1q_dup_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 @@ -4435,7 +4148,7 @@ poly16x8_t test_vld1q_dup_p16(poly16_t const * a) { return vld1q_dup_p16(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_dup_u8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_u8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer @@ -4444,7 +4157,7 @@ uint8x8_t test_vld1_dup_u8(uint8_t const * a) { return vld1_dup_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_dup_u16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 @@ -4455,7 +4168,7 @@ uint16x4_t test_vld1_dup_u16(uint16_t const * a) { return vld1_dup_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vld1_dup_u32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -4466,7 +4179,7 @@ uint32x2_t test_vld1_dup_u32(uint32_t const * a) { return vld1_dup_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vld1_dup_u64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 @@ -4477,7 +4190,7 @@ uint64x1_t test_vld1_dup_u64(uint64_t const * a) { return vld1_dup_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_dup_s8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_s8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer @@ -4486,7 +4199,7 @@ int8x8_t test_vld1_dup_s8(int8_t const * a) { return vld1_dup_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_dup_s16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 @@ -4497,7 +4210,7 @@ int16x4_t test_vld1_dup_s16(int16_t const * a) { return vld1_dup_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vld1_dup_s32(i32* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* // CHECK: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -4508,7 +4221,7 @@ int32x2_t test_vld1_dup_s32(int32_t const * a) { return vld1_dup_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vld1_dup_s64(i64* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64* // CHECK: [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4 @@ -4519,7 +4232,7 @@ int64x1_t test_vld1_dup_s64(int64_t const * a) { return vld1_dup_s64(a); } -// CHECK-LABEL: define <4 x half> @test_vld1_dup_f16(half* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 @@ -4531,7 +4244,7 @@ float16x4_t test_vld1_dup_f16(float16_t const * a) { return vld1_dup_f16(a); } -// CHECK-LABEL: define <2 x float> @test_vld1_dup_f32(float* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float* // CHECK: [[TMP2:%.*]] = load float, float* [[TMP1]], align 4 @@ -4542,7 +4255,7 @@ float32x2_t test_vld1_dup_f32(float32_t const * a) { return vld1_dup_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vld1_dup_p8(i8* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_p8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0 // CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer @@ -4551,7 +4264,7 @@ poly8x8_t test_vld1_dup_p8(poly8_t const * a) { return vld1_dup_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vld1_dup_p16(i16* %a) #0 { +// CHECK-LABEL: @test_vld1_dup_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16* // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 @@ -4562,8 +4275,7 @@ poly16x4_t test_vld1_dup_p16(poly16_t const * a) { return vld1_dup_p16(a); } - -// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_u8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_u8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 // CHECK: ret <16 x i8> [[VLD1_LANE]] @@ -4571,7 +4283,7 @@ uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) { return vld1q_lane_u8(a, b, 15); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_u16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -4583,7 +4295,7 @@ uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) { return vld1q_lane_u16(a, b, 7); } -// CHECK-LABEL: define <4 x i32> @test_vld1q_lane_u32(i32* %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> @@ -4595,7 +4307,7 @@ uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) { return vld1q_lane_u32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vld1q_lane_u64(i64* %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> @@ -4607,7 +4319,7 @@ uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) { return vld1q_lane_u64(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_s8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_s8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 // CHECK: ret <16 x i8> [[VLD1_LANE]] @@ -4615,7 +4327,7 @@ int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) { return vld1q_lane_s8(a, b, 15); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_s16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -4627,7 +4339,7 @@ int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) { return vld1q_lane_s16(a, b, 7); } -// CHECK-LABEL: define <4 x i32> @test_vld1q_lane_s32(i32* %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> @@ -4639,7 +4351,7 @@ int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) { return vld1q_lane_s32(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vld1q_lane_s64(i64* %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> @@ -4651,7 +4363,7 @@ int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) { return vld1q_lane_s64(a, b, 1); } -// CHECK-LABEL: define <8 x half> @test_vld1q_lane_f16(half* %a, <8 x half> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -4664,7 +4376,7 @@ float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) { return vld1q_lane_f16(a, b, 7); } -// CHECK-LABEL: define <4 x float> @test_vld1q_lane_f32(float* %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> @@ -4676,7 +4388,7 @@ float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) { return vld1q_lane_f32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vld1q_lane_p8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_p8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15 // CHECK: ret <16 x i8> [[VLD1_LANE]] @@ -4684,7 +4396,7 @@ poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) { return vld1q_lane_p8(a, b, 15); } -// CHECK-LABEL: define <8 x i16> @test_vld1q_lane_p16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vld1q_lane_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -4696,7 +4408,7 @@ poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) { return vld1q_lane_p16(a, b, 7); } -// CHECK-LABEL: define <8 x i8> @test_vld1_lane_u8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_u8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 // CHECK: ret <8 x i8> [[VLD1_LANE]] @@ -4704,7 +4416,7 @@ uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) { return vld1_lane_u8(a, b, 7); } -// CHECK-LABEL: define <4 x i16> @test_vld1_lane_u16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -4716,7 +4428,7 @@ uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) { return vld1_lane_u16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vld1_lane_u32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -4728,7 +4440,7 @@ uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) { return vld1_lane_u32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vld1_lane_u64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -4740,7 +4452,7 @@ uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) { return vld1_lane_u64(a, b, 0); } -// CHECK-LABEL: define <8 x i8> @test_vld1_lane_s8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_s8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 // CHECK: ret <8 x i8> [[VLD1_LANE]] @@ -4748,7 +4460,7 @@ int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) { return vld1_lane_s8(a, b, 7); } -// CHECK-LABEL: define <4 x i16> @test_vld1_lane_s16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -4760,7 +4472,7 @@ int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) { return vld1_lane_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vld1_lane_s32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -4772,7 +4484,7 @@ int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) { return vld1_lane_s32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vld1_lane_s64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -4784,7 +4496,7 @@ int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) { return vld1_lane_s64(a, b, 0); } -// CHECK-LABEL: define <4 x half> @test_vld1_lane_f16(half* %a, <4 x half> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -4797,7 +4509,7 @@ float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) { return vld1_lane_f16(a, b, 3); } -// CHECK-LABEL: define <2 x float> @test_vld1_lane_f32(float* %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> @@ -4809,7 +4521,7 @@ float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) { return vld1_lane_f32(a, b, 1); } -// CHECK-LABEL: define <8 x i8> @test_vld1_lane_p8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_p8( // CHECK: [[TMP0:%.*]] = load i8, i8* %a, align 1 // CHECK: [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7 // CHECK: ret <8 x i8> [[VLD1_LANE]] @@ -4817,7 +4529,7 @@ poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) { return vld1_lane_p8(a, b, 7); } -// CHECK-LABEL: define <4 x i16> @test_vld1_lane_p16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vld1_lane_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -4829,571 +4541,304 @@ poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) { return vld1_lane_p16(a, b, 3); } - -// CHECK-LABEL: define void @test_vld2q_u8(%struct.uint8x16x2_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld2q_u8( // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* -// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> uint8x16x2_t test_vld2q_u8(uint8_t const * a) { return vld2q_u8(a); } -// CHECK-LABEL: define void @test_vld2q_u16(%struct.uint16x8x2_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld2q_u16( // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> uint16x8x2_t test_vld2q_u16(uint16_t const * a) { return vld2q_u16(a); } -// CHECK-LABEL: define void @test_vld2q_u32(%struct.uint32x4x2_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld2q_u32( // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_V]], { <4 x i32>, <4 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> uint32x4x2_t test_vld2q_u32(uint32_t const * a) { return vld2q_u32(a); } -// CHECK-LABEL: define void @test_vld2q_s8(%struct.int8x16x2_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld2q_s8( // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* -// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> int8x16x2_t test_vld2q_s8(int8_t const * a) { return vld2q_s8(a); } -// CHECK-LABEL: define void @test_vld2q_s16(%struct.int16x8x2_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld2q_s16( // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> int16x8x2_t test_vld2q_s16(int16_t const * a) { return vld2q_s16(a); } -// CHECK-LABEL: define void @test_vld2q_s32(%struct.int32x4x2_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld2q_s32( // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_V]], { <4 x i32>, <4 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32> int32x4x2_t test_vld2q_s32(int32_t const * a) { return vld2q_s32(a); } -// CHECK-LABEL: define void @test_vld2q_f16(%struct.float16x8x2_t* noalias sret %agg.result, half* %a) #0 { +// CHECK-LABEL: @test_vld2q_f16( // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> float16x8x2_t test_vld2q_f16(float16_t const * a) { return vld2q_f16(a); } -// CHECK-LABEL: define void @test_vld2q_f32(%struct.float32x4x2_t* noalias sret %agg.result, float* %a) #0 { +// CHECK-LABEL: @test_vld2q_f32( // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float> } [[VLD2Q_V]], { <4 x float>, <4 x float> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float> float32x4x2_t test_vld2q_f32(float32_t const * a) { return vld2q_f32(a); } -// CHECK-LABEL: define void @test_vld2q_p8(%struct.poly8x16x2_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld2q_p8( // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* -// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2.v16i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8> } [[VLD2Q_V]], { <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8> poly8x16x2_t test_vld2q_p8(poly8_t const * a) { return vld2q_p8(a); } -// CHECK-LABEL: define void @test_vld2q_p16(%struct.poly16x8x2_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld2q_p16( // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_V]], { <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16> poly16x8x2_t test_vld2q_p16(poly16_t const * a) { return vld2q_p16(a); } -// CHECK-LABEL: define void @test_vld2_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld2_u8( // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> uint8x8x2_t test_vld2_u8(uint8_t const * a) { return vld2_u8(a); } -// CHECK-LABEL: define void @test_vld2_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld2_u16( // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> uint16x4x2_t test_vld2_u16(uint16_t const * a) { return vld2_u16(a); } -// CHECK-LABEL: define void @test_vld2_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld2_u32( // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_V]], { <2 x i32>, <2 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> uint32x2x2_t test_vld2_u32(uint32_t const * a) { return vld2_u32(a); } -// CHECK-LABEL: define void @test_vld2_u64(%struct.uint64x1x2_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld2_u64( // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_V]], { <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> uint64x1x2_t test_vld2_u64(uint64_t const * a) { return vld2_u64(a); } -// CHECK-LABEL: define void @test_vld2_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld2_s8( // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> int8x8x2_t test_vld2_s8(int8_t const * a) { return vld2_s8(a); } -// CHECK-LABEL: define void @test_vld2_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld2_s16( // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> int16x4x2_t test_vld2_s16(int16_t const * a) { return vld2_s16(a); } -// CHECK-LABEL: define void @test_vld2_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld2_s32( // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2.v2i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_V]], { <2 x i32>, <2 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32> int32x2x2_t test_vld2_s32(int32_t const * a) { return vld2_s32(a); } -// CHECK-LABEL: define void @test_vld2_s64(%struct.int64x1x2_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld2_s64( // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64> } [[VLD2_V]], { <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64> int64x1x2_t test_vld2_s64(int64_t const * a) { return vld2_s64(a); } -// CHECK-LABEL: define void @test_vld2_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a) #0 { +// CHECK-LABEL: @test_vld2_f16( // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> float16x4x2_t test_vld2_f16(float16_t const * a) { return vld2_f16(a); } -// CHECK-LABEL: define void @test_vld2_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a) #0 { +// CHECK-LABEL: @test_vld2_f32( // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[VLD2_V:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2.v2f32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float> } [[VLD2_V]], { <2 x float>, <2 x float> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <2 x float>, <2 x float> float32x2x2_t test_vld2_f32(float32_t const * a) { return vld2_f32(a); } -// CHECK-LABEL: define void @test_vld2_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld2_p8( // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2.v8i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_V]], { <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8> poly8x8x2_t test_vld2_p8(poly8_t const * a) { return vld2_p8(a); } -// CHECK-LABEL: define void @test_vld2_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld2_p16( // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_V]], { <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16> poly16x4x2_t test_vld2_p16(poly16_t const * a) { return vld2_p16(a); } - -// CHECK-LABEL: define void @test_vld2_dup_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_u8( // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) -// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer -// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 -// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer -// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 -// CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]] -// CHECK: [[TMP6:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> uint8x8x2_t test_vld2_dup_u8(uint8_t const * a) { return vld2_dup_u8(a); } -// CHECK-LABEL: define void @test_vld2_dup_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_u16( // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> uint16x4x2_t test_vld2_dup_u16(uint16_t const * a) { return vld2_dup_u16(a); } -// CHECK-LABEL: define void @test_vld2_dup_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_u32( // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) -// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32> } [[TMP5]], { <2 x i32>, <2 x i32> }* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32> uint32x2x2_t test_vld2_dup_u32(uint32_t const * a) { return vld2_dup_u32(a); } -// CHECK-LABEL: define void @test_vld2_dup_u64(%struct.uint64x1x2_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_u64( // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64> uint64x1x2_t test_vld2_dup_u64(uint64_t const * a) { return vld2_dup_u64(a); } -// CHECK-LABEL: define void @test_vld2_dup_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_s8( // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) -// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer -// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 -// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer -// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 -// CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]] -// CHECK: [[TMP6:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* -// CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> int8x8x2_t test_vld2_dup_s8(int8_t const * a) { return vld2_dup_s8(a); } -// CHECK-LABEL: define void @test_vld2_dup_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_s16( // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> int16x4x2_t test_vld2_dup_s16(int16_t const * a) { return vld2_dup_s16(a); } -// CHECK-LABEL: define void @test_vld2_dup_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_s32( // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) -// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32> } [[TMP5]], { <2 x i32>, <2 x i32> }* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32> int32x2x2_t test_vld2_dup_s32(int32_t const * a) { return vld2_dup_s32(a); } -// CHECK-LABEL: define void @test_vld2_dup_s64(%struct.int64x1x2_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_s64( // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.arm.neon.vld2.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x2_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64> int64x1x2_t test_vld2_dup_s64(int64_t const * a) { return vld2_dup_s64(a); } -// CHECK-LABEL: define void @test_vld2_dup_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_f16( // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> float16x4x2_t test_vld2_dup_f16(float16_t const * a) { return vld2_dup_f16(a); } -// CHECK-LABEL: define void @test_vld2_dup_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_f32( // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, i32 0, i32 4) -// CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float> } [[TMP5]], { <2 x float>, <2 x float> }* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float> float32x2x2_t test_vld2_dup_f32(float32_t const * a) { return vld2_dup_f32(a); } -// CHECK-LABEL: define void @test_vld2_dup_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_p8( // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) -// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer -// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 -// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP2]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer -// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 -// CHECK: [[TMP5:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[TMP4]], { <8 x i8>, <8 x i8> }* [[TMP5]] -// CHECK: [[TMP6:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP6]], i8* [[TMP7]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8> poly8x8x2_t test_vld2_dup_p8(poly8_t const * a) { return vld2_dup_p8(a); } -// CHECK-LABEL: define void @test_vld2_dup_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld2_dup_p16( // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[TMP5]], { <4 x i16>, <4 x i16> }* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16> poly16x4x2_t test_vld2_dup_p16(poly16_t const * a) { return vld2_dup_p16(a); } - -// CHECK-LABEL: define void @test_vld2q_lane_u16(%struct.uint16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2q_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 @@ -5415,18 +4860,12 @@ poly16x4x2_t test_vld2_dup_p16(poly16_t const * a) { // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) { return vld2q_lane_u16(a, b, 7); } -// CHECK-LABEL: define void @test_vld2q_lane_u32(%struct.uint32x4x2_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2q_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 @@ -5448,18 +4887,12 @@ uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], i32 3, i32 4) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], { <4 x i32>, <4 x i32> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) { return vld2q_lane_u32(a, b, 3); } -// CHECK-LABEL: define void @test_vld2q_lane_s16(%struct.int16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2q_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 @@ -5481,18 +4914,12 @@ uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) { return vld2q_lane_s16(a, b, 7); } -// CHECK-LABEL: define void @test_vld2q_lane_s32(%struct.int32x4x2_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2q_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 @@ -5514,18 +4941,12 @@ int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> -// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP9]], <4 x i32> [[TMP10]], i32 3, i32 4) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32> } [[VLD2Q_LANE_V]], { <4 x i32>, <4 x i32> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32> int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) { return vld2q_lane_s32(a, b, 3); } -// CHECK-LABEL: define void @test_vld2q_lane_f16(%struct.float16x8x2_t* noalias sret %agg.result, half* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2q_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 @@ -5547,18 +4968,12 @@ int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.float16x8x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) { return vld2q_lane_f16(a, b, 7); } -// CHECK-LABEL: define void @test_vld2q_lane_f32(%struct.float32x4x2_t* noalias sret %agg.result, float* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2q_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 @@ -5580,18 +4995,12 @@ float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> -// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP9]], <4 x float> [[TMP10]], i32 3, i32 4) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float> } [[VLD2Q_LANE_V]], { <4 x float>, <4 x float> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float> float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) { return vld2q_lane_f32(a, b, 3); } -// CHECK-LABEL: define void @test_vld2q_lane_p16(%struct.poly16x8x2_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2q_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 @@ -5613,18 +5022,12 @@ float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> -// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP9]], <8 x i16> [[TMP10]], i32 7, i32 2) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16> } [[VLD2Q_LANE_V]], { <8 x i16>, <8 x i16> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 32, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16> poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) { return vld2q_lane_p16(a, b, 7); } -// CHECK-LABEL: define void @test_vld2_lane_u8(%struct.uint8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2_lane_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 @@ -5641,18 +5044,12 @@ poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) { // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 -// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) -// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) { return vld2_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vld2_lane_u16(%struct.uint16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 @@ -5674,18 +5071,12 @@ uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) { return vld2_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vld2_lane_u32(%struct.uint32x2x2_t* noalias sret %agg.result, i32* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 @@ -5707,18 +5098,12 @@ uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], i32 1, i32 4) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], { <2 x i32>, <2 x i32> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) { return vld2_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vld2_lane_s8(%struct.int8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2_lane_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 @@ -5735,18 +5120,12 @@ uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) { // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 -// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) -// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) { return vld2_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vld2_lane_s16(%struct.int16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 @@ -5768,18 +5147,12 @@ int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) { return vld2_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vld2_lane_s32(%struct.int32x2x2_t* noalias sret %agg.result, i32* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 @@ -5801,18 +5174,12 @@ int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> -// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], i32 1, i32 4) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32> } [[VLD2_LANE_V]], { <2 x i32>, <2 x i32> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32> int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) { return vld2_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vld2_lane_f16(%struct.float16x4x2_t* noalias sret %agg.result, half* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 @@ -5834,18 +5201,12 @@ int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) { return vld2_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vld2_lane_f32(%struct.float32x2x2_t* noalias sret %agg.result, float* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 @@ -5867,18 +5228,12 @@ float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> -// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float> } @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP9]], <2 x float> [[TMP10]], i32 1, i32 4) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float> } [[VLD2_LANE_V]], { <2 x float>, <2 x float> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float> float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) { return vld2_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vld2_lane_p8(%struct.poly8x8x2_t* noalias sret %agg.result, i8* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2_lane_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 @@ -5895,18 +5250,12 @@ float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) { // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1 // CHECK: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8 -// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1) -// CHECK: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8> } [[VLD2_LANE_V]], { <8 x i8>, <8 x i8> }* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8> poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) { return vld2_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vld2_lane_p16(%struct.poly16x4x2_t* noalias sret %agg.result, i16* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld2_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 @@ -5928,612 +5277,309 @@ poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) { // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> -// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP9]], <4 x i16> [[TMP10]], i32 3, i32 2) -// CHECK: [[TMP11:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16> } [[VLD2_LANE_V]], { <4 x i16>, <4 x i16> }* [[TMP11]] -// CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* -// CHECK: [[TMP13:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP12]], i8* [[TMP13]], i32 16, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16> poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) { return vld2_lane_p16(a, b, 3); } - -// CHECK-LABEL: define void @test_vld3q_u8(%struct.uint8x16x3_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld3q_u8( // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* -// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> uint8x16x3_t test_vld3q_u8(uint8_t const * a) { return vld3q_u8(a); } -// CHECK-LABEL: define void @test_vld3q_u16(%struct.uint16x8x3_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld3q_u16( // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> uint16x8x3_t test_vld3q_u16(uint16_t const * a) { return vld3q_u16(a); } -// CHECK-LABEL: define void @test_vld3q_u32(%struct.uint32x4x3_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld3q_u32( // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> uint32x4x3_t test_vld3q_u32(uint32_t const * a) { return vld3q_u32(a); } -// CHECK-LABEL: define void @test_vld3q_s8(%struct.int8x16x3_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld3q_s8( // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* -// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> int8x16x3_t test_vld3q_s8(int8_t const * a) { return vld3q_s8(a); } -// CHECK-LABEL: define void @test_vld3q_s16(%struct.int16x8x3_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld3q_s16( // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> int16x8x3_t test_vld3q_s16(int16_t const * a) { return vld3q_s16(a); } -// CHECK-LABEL: define void @test_vld3q_s32(%struct.int32x4x3_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld3q_s32( // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3.v4i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> int32x4x3_t test_vld3q_s32(int32_t const * a) { return vld3q_s32(a); } -// CHECK-LABEL: define void @test_vld3q_f16(%struct.float16x8x3_t* noalias sret %agg.result, half* %a) #0 { +// CHECK-LABEL: @test_vld3q_f16( // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> float16x8x3_t test_vld3q_f16(float16_t const * a) { return vld3q_f16(a); } -// CHECK-LABEL: define void @test_vld3q_f32(%struct.float32x4x3_t* noalias sret %agg.result, float* %a) #0 { +// CHECK-LABEL: @test_vld3q_f32( // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_V]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> float32x4x3_t test_vld3q_f32(float32_t const * a) { return vld3q_f32(a); } -// CHECK-LABEL: define void @test_vld3q_p8(%struct.poly8x16x3_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld3q_p8( // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* -// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3Q_V]], { <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> poly8x16x3_t test_vld3q_p8(poly8_t const * a) { return vld3q_p8(a); } -// CHECK-LABEL: define void @test_vld3q_p16(%struct.poly16x8x3_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld3q_p16( // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> poly16x8x3_t test_vld3q_p16(poly16_t const * a) { return vld3q_p16(a); } -// CHECK-LABEL: define void @test_vld3_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld3_u8( // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> uint8x8x3_t test_vld3_u8(uint8_t const * a) { return vld3_u8(a); } -// CHECK-LABEL: define void @test_vld3_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld3_u16( // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> uint16x4x3_t test_vld3_u16(uint16_t const * a) { return vld3_u16(a); } -// CHECK-LABEL: define void @test_vld3_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld3_u32( // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> uint32x2x3_t test_vld3_u32(uint32_t const * a) { return vld3_u32(a); } -// CHECK-LABEL: define void @test_vld3_u64(%struct.uint64x1x3_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld3_u64( // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> uint64x1x3_t test_vld3_u64(uint64_t const * a) { return vld3_u64(a); } -// CHECK-LABEL: define void @test_vld3_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld3_s8( // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> int8x8x3_t test_vld3_s8(int8_t const * a) { return vld3_s8(a); } -// CHECK-LABEL: define void @test_vld3_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld3_s16( // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> int16x4x3_t test_vld3_s16(int16_t const * a) { return vld3_s16(a); } -// CHECK-LABEL: define void @test_vld3_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld3_s32( // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3.v2i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> int32x2x3_t test_vld3_s32(int32_t const * a) { return vld3_s32(a); } -// CHECK-LABEL: define void @test_vld3_s64(%struct.int64x1x3_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld3_s64( // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3_V]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> int64x1x3_t test_vld3_s64(int64_t const * a) { return vld3_s64(a); } -// CHECK-LABEL: define void @test_vld3_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a) #0 { +// CHECK-LABEL: @test_vld3_f16( // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> float16x4x3_t test_vld3_f16(float16_t const * a) { return vld3_f16(a); } -// CHECK-LABEL: define void @test_vld3_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a) #0 { +// CHECK-LABEL: @test_vld3_f32( // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3.v2f32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_V]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> float32x2x3_t test_vld3_f32(float32_t const * a) { return vld3_f32(a); } -// CHECK-LABEL: define void @test_vld3_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld3_p8( // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3.v8i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> poly8x8x3_t test_vld3_p8(poly8_t const * a) { return vld3_p8(a); } -// CHECK-LABEL: define void @test_vld3_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld3_p16( // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> poly16x4x3_t test_vld3_p16(poly16_t const * a) { return vld3_p16(a); } - -// CHECK-LABEL: define void @test_vld3_dup_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_u8( // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) -// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer -// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 -// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer -// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 -// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer -// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 -// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8* -// CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> uint8x8x3_t test_vld3_dup_u8(uint8_t const * a) { return vld3_dup_u8(a); } -// CHECK-LABEL: define void @test_vld3_dup_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_u16( // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] -// CHECK: [[TMP9:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8* -// CHECK: [[TMP10:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> uint16x4x3_t test_vld3_dup_u16(uint16_t const * a) { return vld3_dup_u16(a); } -// CHECK-LABEL: define void @test_vld3_dup_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_u32( // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) -// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP8]] -// CHECK: [[TMP9:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8* -// CHECK: [[TMP10:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> uint32x2x3_t test_vld3_dup_u32(uint32_t const * a) { return vld3_dup_u32(a); } -// CHECK-LABEL: define void @test_vld3_dup_u64(%struct.uint64x1x3_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_u64( // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> uint64x1x3_t test_vld3_dup_u64(uint64_t const * a) { return vld3_dup_u64(a); } -// CHECK-LABEL: define void @test_vld3_dup_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_s8( // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) -// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer -// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 -// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer -// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 -// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer -// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 -// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] -// CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8* -// CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> int8x8x3_t test_vld3_dup_s8(int8_t const * a) { return vld3_dup_s8(a); } -// CHECK-LABEL: define void @test_vld3_dup_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_s16( // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] -// CHECK: [[TMP9:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8* -// CHECK: [[TMP10:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> int16x4x3_t test_vld3_dup_s16(int16_t const * a) { return vld3_dup_s16(a); } -// CHECK-LABEL: define void @test_vld3_dup_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_s32( // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) -// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP8]] -// CHECK: [[TMP9:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8* -// CHECK: [[TMP10:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> int32x2x3_t test_vld3_dup_s32(int32_t const * a) { return vld3_dup_s32(a); } -// CHECK-LABEL: define void @test_vld3_dup_s64(%struct.int64x1x3_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_s64( // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x3_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> int64x1x3_t test_vld3_dup_s64(int64_t const * a) { return vld3_dup_s64(a); } -// CHECK-LABEL: define void @test_vld3_dup_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_f16( // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] -// CHECK: [[TMP9:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8* -// CHECK: [[TMP10:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> float16x4x3_t test_vld3_dup_f16(float16_t const * a) { return vld3_dup_f16(a); } -// CHECK-LABEL: define void @test_vld3_dup_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_f32( // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4) -// CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP5]], <2 x float> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[TMP7]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP8]] -// CHECK: [[TMP9:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8* -// CHECK: [[TMP10:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float>, <2 x float> float32x2x3_t test_vld3_dup_f32(float32_t const * a) { return vld3_dup_f32(a); } -// CHECK-LABEL: define void @test_vld3_dup_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_p8( // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) -// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer -// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 -// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer -// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 -// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer -// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 -// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8* -// CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> poly8x8x3_t test_vld3_dup_p8(poly8_t const * a) { return vld3_dup_p8(a); } -// CHECK-LABEL: define void @test_vld3_dup_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld3_dup_p16( // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP8]] -// CHECK: [[TMP9:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8* -// CHECK: [[TMP10:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> poly16x4x3_t test_vld3_dup_p16(poly16_t const * a) { return vld3_dup_p16(a); } - -// CHECK-LABEL: define void @test_vld3q_lane_u16(%struct.uint16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3q_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 @@ -6560,18 +5606,12 @@ poly16x4x3_t test_vld3_dup_p16(poly16_t const * a) { // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> -// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x8x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) { return vld3q_lane_u16(a, b, 7); } -// CHECK-LABEL: define void @test_vld3q_lane_u32(%struct.uint32x4x3_t* noalias sret %agg.result, i32* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3q_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 @@ -6598,18 +5638,12 @@ uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> -// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], i32 3, i32 4) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x4x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) { return vld3q_lane_u32(a, b, 3); } -// CHECK-LABEL: define void @test_vld3q_lane_s16(%struct.int16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3q_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 @@ -6636,18 +5670,12 @@ uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> -// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.int16x8x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) { return vld3q_lane_s16(a, b, 7); } -// CHECK-LABEL: define void @test_vld3q_lane_s32(%struct.int32x4x3_t* noalias sret %agg.result, i32* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3q_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 @@ -6674,18 +5702,12 @@ int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> -// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], i32 3, i32 4) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.int32x4x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) { return vld3q_lane_s32(a, b, 3); } -// CHECK-LABEL: define void @test_vld3q_lane_f16(%struct.float16x8x3_t* noalias sret %agg.result, half* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3q_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 @@ -6712,18 +5734,12 @@ int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> -// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.float16x8x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) { return vld3q_lane_f16(a, b, 7); } -// CHECK-LABEL: define void @test_vld3q_lane_f32(%struct.float32x4x3_t* noalias sret %agg.result, float* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3q_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 @@ -6750,18 +5766,12 @@ float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> -// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], i32 3, i32 4) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3Q_LANE_V]], { <4 x float>, <4 x float>, <4 x float> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.float32x4x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float> float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) { return vld3q_lane_f32(a, b, 3); } -// CHECK-LABEL: define void @test_vld3q_lane_p16(%struct.poly16x8x3_t* noalias sret %agg.result, i16* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3q_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 @@ -6788,18 +5798,12 @@ float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> -// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], i32 7, i32 2) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x8x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 48, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) { return vld3q_lane_p16(a, b, 7); } -// CHECK-LABEL: define void @test_vld3_lane_u8(%struct.uint8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3_lane_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 @@ -6819,18 +5823,12 @@ poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) { // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 -// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) -// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint8x8x3_t* %agg.result to i8* -// CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) { return vld3_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vld3_lane_u16(%struct.uint16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 @@ -6857,18 +5855,12 @@ uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> -// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.uint16x4x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) { return vld3_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vld3_lane_u32(%struct.uint32x2x3_t* noalias sret %agg.result, i32* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 @@ -6895,18 +5887,12 @@ uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> -// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], i32 1, i32 4) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.uint32x2x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) { return vld3_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vld3_lane_s8(%struct.int8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3_lane_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 @@ -6926,18 +5912,12 @@ uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) { // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 -// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) -// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] -// CHECK: [[TMP8:%.*]] = bitcast %struct.int8x8x3_t* %agg.result to i8* -// CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) { return vld3_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vld3_lane_s16(%struct.int16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 @@ -6964,18 +5944,12 @@ int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> -// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.int16x4x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) { return vld3_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vld3_lane_s32(%struct.int32x2x3_t* noalias sret %agg.result, i32* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 @@ -7002,18 +5976,12 @@ int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> -// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], i32 1, i32 4) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.int32x2x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) { return vld3_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vld3_lane_f16(%struct.float16x4x3_t* noalias sret %agg.result, half* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 @@ -7040,18 +6008,12 @@ int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> -// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.float16x4x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) { return vld3_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vld3_lane_f32(%struct.float32x2x3_t* noalias sret %agg.result, float* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 @@ -7078,18 +6040,12 @@ float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> -// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], i32 1, i32 4) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3_LANE_V]], { <2 x float>, <2 x float>, <2 x float> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.float32x2x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float> float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) { return vld3_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vld3_lane_p8(%struct.poly8x8x3_t* noalias sret %agg.result, i8* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3_lane_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 @@ -7109,18 +6065,12 @@ float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) { // CHECK: [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2 // CHECK: [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8 -// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1) -// CHECK: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP7]] -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly8x8x3_t* %agg.result to i8* -// CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP8]], i8* [[TMP9]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) { return vld3_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vld3_lane_p16(%struct.poly16x4x3_t* noalias sret %agg.result, i16* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld3_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 @@ -7147,642 +6097,309 @@ poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) { // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> -// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], i32 3, i32 2) -// CHECK: [[TMP14:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP14]] -// CHECK: [[TMP15:%.*]] = bitcast %struct.poly16x4x3_t* %agg.result to i8* -// CHECK: [[TMP16:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP15]], i8* [[TMP16]], i32 24, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) { return vld3_lane_p16(a, b, 3); } - -// CHECK-LABEL: define void @test_vld4q_u8(%struct.uint8x16x4_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld4q_u8( // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* -// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> uint8x16x4_t test_vld4q_u8(uint8_t const * a) { return vld4q_u8(a); } -// CHECK-LABEL: define void @test_vld4q_u16(%struct.uint16x8x4_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld4q_u16( // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> uint16x8x4_t test_vld4q_u16(uint16_t const * a) { return vld4q_u16(a); } -// CHECK-LABEL: define void @test_vld4q_u32(%struct.uint32x4x4_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld4q_u32( // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> uint32x4x4_t test_vld4q_u32(uint32_t const * a) { return vld4q_u32(a); } -// CHECK-LABEL: define void @test_vld4q_s8(%struct.int8x16x4_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld4q_s8( // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* -// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> int8x16x4_t test_vld4q_s8(int8_t const * a) { return vld4q_s8(a); } -// CHECK-LABEL: define void @test_vld4q_s16(%struct.int16x8x4_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld4q_s16( // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> int16x8x4_t test_vld4q_s16(int16_t const * a) { return vld4q_s16(a); } -// CHECK-LABEL: define void @test_vld4q_s32(%struct.int32x4x4_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld4q_s32( // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4.v4i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> int32x4x4_t test_vld4q_s32(int32_t const * a) { return vld4q_s32(a); } -// CHECK-LABEL: define void @test_vld4q_f16(%struct.float16x8x4_t* noalias sret %agg.result, half* %a) #0 { +// CHECK-LABEL: @test_vld4q_f16( // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> float16x8x4_t test_vld4q_f16(float16_t const * a) { return vld4q_f16(a); } -// CHECK-LABEL: define void @test_vld4q_f32(%struct.float32x4x4_t* noalias sret %agg.result, float* %a) #0 { +// CHECK-LABEL: @test_vld4q_f32( // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_V]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> float32x4x4_t test_vld4q_f32(float32_t const * a) { return vld4q_f32(a); } -// CHECK-LABEL: define void @test_vld4q_p8(%struct.poly8x16x4_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld4q_p8( // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* -// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* -// CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4Q_V]], { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> poly8x16x4_t test_vld4q_p8(poly8_t const * a) { return vld4q_p8(a); } -// CHECK-LABEL: define void @test_vld4q_p16(%struct.poly16x8x4_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld4q_p16( // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4.v8i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> poly16x8x4_t test_vld4q_p16(poly16_t const * a) { return vld4q_p16(a); } -// CHECK-LABEL: define void @test_vld4_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld4_u8( // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> uint8x8x4_t test_vld4_u8(uint8_t const * a) { return vld4_u8(a); } -// CHECK-LABEL: define void @test_vld4_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld4_u16( // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> uint16x4x4_t test_vld4_u16(uint16_t const * a) { return vld4_u16(a); } -// CHECK-LABEL: define void @test_vld4_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld4_u32( // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> uint32x2x4_t test_vld4_u32(uint32_t const * a) { return vld4_u32(a); } -// CHECK-LABEL: define void @test_vld4_u64(%struct.uint64x1x4_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld4_u64( // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> uint64x1x4_t test_vld4_u64(uint64_t const * a) { return vld4_u64(a); } -// CHECK-LABEL: define void @test_vld4_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld4_s8( // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> int8x8x4_t test_vld4_s8(int8_t const * a) { return vld4_s8(a); } -// CHECK-LABEL: define void @test_vld4_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld4_s16( // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> int16x4x4_t test_vld4_s16(int16_t const * a) { return vld4_s16(a); } -// CHECK-LABEL: define void @test_vld4_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld4_s32( // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4.v2i32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> int32x2x4_t test_vld4_s32(int32_t const * a) { return vld4_s32(a); } -// CHECK-LABEL: define void @test_vld4_s64(%struct.int64x1x4_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld4_s64( // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4_V]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> int64x1x4_t test_vld4_s64(int64_t const * a) { return vld4_s64(a); } -// CHECK-LABEL: define void @test_vld4_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a) #0 { +// CHECK-LABEL: @test_vld4_f16( // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> float16x4x4_t test_vld4_f16(float16_t const * a) { return vld4_f16(a); } -// CHECK-LABEL: define void @test_vld4_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a) #0 { +// CHECK-LABEL: @test_vld4_f32( // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4.v2f32.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_V]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> float32x2x4_t test_vld4_f32(float32_t const * a) { return vld4_f32(a); } -// CHECK-LABEL: define void @test_vld4_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld4_p8( // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4.v8i8.p0i8(i8* %a, i32 1) -// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP1]] -// CHECK: [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8* -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP2]], i8* [[TMP3]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> poly8x8x4_t test_vld4_p8(poly8_t const * a) { return vld4_p8(a); } -// CHECK-LABEL: define void @test_vld4_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld4_p16( // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4.v4i16.p0i8(i8* [[TMP1]], i32 2) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> poly16x4x4_t test_vld4_p16(poly16_t const * a) { return vld4_p16(a); } - -// CHECK-LABEL: define void @test_vld4_dup_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_u8( // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) -// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer -// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 -// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer -// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 -// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer -// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 -// CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3 -// CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer -// CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3 -// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]] -// CHECK: [[TMP10:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8* -// CHECK: [[TMP11:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> uint8x8x4_t test_vld4_dup_u8(uint8_t const * a) { return vld4_dup_u8(a); } -// CHECK-LABEL: define void @test_vld4_dup_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_u16( // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 -// CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer -// CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 -// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] -// CHECK: [[TMP11:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8* -// CHECK: [[TMP12:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> uint16x4x4_t test_vld4_dup_u16(uint16_t const * a) { return vld4_dup_u16(a); } -// CHECK-LABEL: define void @test_vld4_dup_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_u32( // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) -// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], 3 -// CHECK: [[LANE3:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP8]], <2 x i32> zeroinitializer -// CHECK: [[TMP9:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], <2 x i32> [[LANE3]], 3 -// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP9]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP10]] -// CHECK: [[TMP11:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8* -// CHECK: [[TMP12:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> uint32x2x4_t test_vld4_dup_u32(uint32_t const * a) { return vld4_dup_u32(a); } -// CHECK-LABEL: define void @test_vld4_dup_u64(%struct.uint64x1x4_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_u64( // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.uint64x1x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> uint64x1x4_t test_vld4_dup_u64(uint64_t const * a) { return vld4_dup_u64(a); } -// CHECK-LABEL: define void @test_vld4_dup_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_s8( // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) -// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer -// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 -// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer -// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 -// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer -// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 -// CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3 -// CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer -// CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3 -// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]] -// CHECK: [[TMP10:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8* -// CHECK: [[TMP11:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> int8x8x4_t test_vld4_dup_s8(int8_t const * a) { return vld4_dup_s8(a); } -// CHECK-LABEL: define void @test_vld4_dup_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_s16( // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 -// CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer -// CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 -// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] -// CHECK: [[TMP11:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8* -// CHECK: [[TMP12:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> int16x4x4_t test_vld4_dup_s16(int16_t const * a) { return vld4_dup_s16(a); } -// CHECK-LABEL: define void @test_vld4_dup_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_s32( // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP1]], <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 4) -// CHECK: [[TMP2:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP2]], <2 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD_DUP]], <2 x i32> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP4]], <2 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP3]], <2 x i32> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> [[TMP6]], <2 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP5]], <2 x i32> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], 3 -// CHECK: [[LANE3:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP8]], <2 x i32> zeroinitializer -// CHECK: [[TMP9:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP7]], <2 x i32> [[LANE3]], 3 -// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP9]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP10]] -// CHECK: [[TMP11:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8* -// CHECK: [[TMP12:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> int32x2x4_t test_vld4_dup_s32(int32_t const * a) { return vld4_dup_s32(a); } -// CHECK-LABEL: define void @test_vld4_dup_s64(%struct.int64x1x4_t* noalias sret %agg.result, i64* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_s64( // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* [[TMP1]], i32 4) -// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* -// CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD_DUP]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP2]] -// CHECK: [[TMP3:%.*]] = bitcast %struct.int64x1x4_t* %agg.result to i8* -// CHECK: [[TMP4:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> int64x1x4_t test_vld4_dup_s64(int64_t const * a) { return vld4_dup_s64(a); } -// CHECK-LABEL: define void @test_vld4_dup_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_f16( // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 -// CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer -// CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 -// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] -// CHECK: [[TMP11:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8* -// CHECK: [[TMP12:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> float16x4x4_t test_vld4_dup_f16(float16_t const * a) { return vld4_dup_f16(a); } -// CHECK-LABEL: define void @test_vld4_dup_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_f32( // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* [[TMP1]], <2 x float> undef, <2 x float> undef, <2 x float> undef, <2 x float> undef, i32 0, i32 4) -// CHECK: [[TMP2:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP2]], <2 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD_DUP]], <2 x float> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP4]], <2 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP3]], <2 x float> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> [[TMP6]], <2 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP5]], <2 x float> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP7]], 3 -// CHECK: [[LANE3:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP8]], <2 x i32> zeroinitializer -// CHECK: [[TMP9:%.*]] = insertvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP7]], <2 x float> [[LANE3]], 3 -// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP9]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP10]] -// CHECK: [[TMP11:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8* -// CHECK: [[TMP12:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> float32x2x4_t test_vld4_dup_f32(float32_t const * a) { return vld4_dup_f32(a); } -// CHECK-LABEL: define void @test_vld4_dup_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_p8( // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) -// CHECK: [[TMP1:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer -// CHECK: [[TMP2:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD_DUP]], <8 x i8> [[LANE]], 0 -// CHECK: [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP3]], <8 x i32> zeroinitializer -// CHECK: [[TMP4:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP2]], <8 x i8> [[LANE1]], 1 -// CHECK: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> [[TMP5]], <8 x i32> zeroinitializer -// CHECK: [[TMP6:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], <8 x i8> [[LANE2]], 2 -// CHECK: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], 3 -// CHECK: [[LANE3:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP7]], <8 x i32> zeroinitializer -// CHECK: [[TMP8:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP6]], <8 x i8> [[LANE3]], 3 -// CHECK: [[TMP9:%.*]] = bitcast i8* [[TMP0]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP8]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP9]] -// CHECK: [[TMP10:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8* -// CHECK: [[TMP11:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP10]], i8* [[TMP11]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> poly8x8x4_t test_vld4_dup_p8(poly8_t const * a) { return vld4_dup_p8(a); } -// CHECK-LABEL: define void @test_vld4_dup_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a) #0 { +// CHECK-LABEL: @test_vld4_dup_p16( // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8* -// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP1]], <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) -// CHECK: [[TMP2:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], 0 -// CHECK: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> zeroinitializer -// CHECK: [[TMP3:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD_DUP]], <4 x i16> [[LANE]], 0 -// CHECK: [[TMP4:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], 1 -// CHECK: [[LANE1:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP4]], <4 x i32> zeroinitializer -// CHECK: [[TMP5:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP3]], <4 x i16> [[LANE1]], 1 -// CHECK: [[TMP6:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], 2 -// CHECK: [[LANE2:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP6]], <4 x i32> zeroinitializer -// CHECK: [[TMP7:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP5]], <4 x i16> [[LANE2]], 2 -// CHECK: [[TMP8:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], 3 -// CHECK: [[LANE3:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP8]], <4 x i32> zeroinitializer -// CHECK: [[TMP9:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP7]], <4 x i16> [[LANE3]], 3 -// CHECK: [[TMP10:%.*]] = bitcast i8* [[TMP0]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP9]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP10]] -// CHECK: [[TMP11:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8* -// CHECK: [[TMP12:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP11]], i8* [[TMP12]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD_DUP:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> poly16x4x4_t test_vld4_dup_p16(poly16_t const * a) { return vld4_dup_p16(a); } - -// CHECK-LABEL: define void @test_vld4q_lane_u16(%struct.uint16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4q_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 @@ -7814,18 +6431,12 @@ poly16x4x4_t test_vld4_dup_p16(poly16_t const * a) { // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> -// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x8x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) { return vld4q_lane_u16(a, b, 7); } -// CHECK-LABEL: define void @test_vld4q_lane_u32(%struct.uint32x4x4_t* noalias sret %agg.result, i32* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4q_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 @@ -7857,18 +6468,12 @@ uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32> -// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], i32 3, i32 4) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x4x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) { return vld4q_lane_u32(a, b, 3); } -// CHECK-LABEL: define void @test_vld4q_lane_s16(%struct.int16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4q_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 @@ -7900,18 +6505,12 @@ uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> -// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.int16x8x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) { return vld4q_lane_s16(a, b, 7); } -// CHECK-LABEL: define void @test_vld4q_lane_s32(%struct.int32x4x4_t* noalias sret %agg.result, i32* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4q_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 @@ -7943,18 +6542,12 @@ int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32> -// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* [[TMP4]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], i32 3, i32 4) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* -// CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4Q_LANE_V]], { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.int32x4x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) { return vld4q_lane_s32(a, b, 3); } -// CHECK-LABEL: define void @test_vld4q_lane_f16(%struct.float16x8x4_t* noalias sret %agg.result, half* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4q_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 @@ -7986,18 +6579,12 @@ int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> -// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.float16x8x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) { return vld4q_lane_f16(a, b, 7); } -// CHECK-LABEL: define void @test_vld4q_lane_f32(%struct.float32x4x4_t* noalias sret %agg.result, float* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4q_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 @@ -8029,18 +6616,12 @@ float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x float> -// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4lane.v4f32.p0i8(i8* [[TMP4]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], <4 x float> [[TMP16]], i32 3, i32 4) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* -// CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4Q_LANE_V]], { <4 x float>, <4 x float>, <4 x float>, <4 x float> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.float32x4x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) { return vld4q_lane_f32(a, b, 3); } -// CHECK-LABEL: define void @test_vld4q_lane_p16(%struct.poly16x8x4_t* noalias sret %agg.result, i16* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4q_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 @@ -8072,18 +6653,12 @@ float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> // CHECK: [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> // CHECK: [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16> -// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* [[TMP4]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], <8 x i16> [[TMP16]], i32 7, i32 2) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* -// CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4Q_LANE_V]], { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x8x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 64, i32 16, i1 false) -// CHECK: ret void +// CHECK: [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) { return vld4q_lane_p16(a, b, 7); } -// CHECK-LABEL: define void @test_vld4_lane_u8(%struct.uint8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4_lane_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 @@ -8106,18 +6681,12 @@ poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) { // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 -// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1) -// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]] -// CHECK: [[TMP9:%.*]] = bitcast %struct.uint8x8x4_t* %agg.result to i8* -// CHECK: [[TMP10:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) { return vld4_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vld4_lane_u16(%struct.uint16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 @@ -8149,18 +6718,12 @@ uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> -// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.uint16x4x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) { return vld4_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vld4_lane_u32(%struct.uint32x2x4_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 @@ -8192,18 +6755,12 @@ uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32> -// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> [[TMP16]], i32 1, i32 4) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.uint32x2x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) { return vld4_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vld4_lane_s8(%struct.int8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4_lane_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 @@ -8226,18 +6783,12 @@ uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) { // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 -// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1) -// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]] -// CHECK: [[TMP9:%.*]] = bitcast %struct.int8x8x4_t* %agg.result to i8* -// CHECK: [[TMP10:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) { return vld4_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vld4_lane_s16(%struct.int16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 @@ -8269,18 +6820,12 @@ int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> -// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.int16x4x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) { return vld4_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vld4_lane_s32(%struct.int32x2x4_t* noalias sret %agg.result, i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 @@ -8312,18 +6857,12 @@ int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32> -// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* [[TMP4]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <2 x i32> [[TMP16]], i32 1, i32 4) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* -// CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4_LANE_V]], { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.int32x2x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) { return vld4_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vld4_lane_f16(%struct.float16x4x4_t* noalias sret %agg.result, half* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 @@ -8355,18 +6894,12 @@ int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> -// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.float16x4x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) { return vld4_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vld4_lane_f32(%struct.float32x2x4_t* noalias sret %agg.result, float* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 @@ -8398,18 +6931,12 @@ float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x float> -// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* [[TMP4]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], <2 x float> [[TMP16]], i32 1, i32 4) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* -// CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4_LANE_V]], { <2 x float>, <2 x float>, <2 x float>, <2 x float> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.float32x2x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) { return vld4_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vld4_lane_p8(%struct.poly8x8x4_t* noalias sret %agg.result, i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4_lane_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 @@ -8432,18 +6959,12 @@ float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) { // CHECK: [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3 // CHECK: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8 -// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], i32 7, i32 1) -// CHECK: [[TMP8:%.*]] = bitcast i8* [[TMP3]] to { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* -// CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE_V]], { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }* [[TMP8]] -// CHECK: [[TMP9:%.*]] = bitcast %struct.poly8x8x4_t* %agg.result to i8* -// CHECK: [[TMP10:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP9]], i8* [[TMP10]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) { return vld4_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vld4_lane_p16(%struct.poly16x4x4_t* noalias sret %agg.result, i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vld4_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 @@ -8475,337 +6996,268 @@ poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) { // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> // CHECK: [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> // CHECK: [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16> -// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* [[TMP4]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], <4 x i16> [[TMP16]], i32 3, i32 2) -// CHECK: [[TMP17:%.*]] = bitcast i8* [[TMP3]] to { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* -// CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4_LANE_V]], { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }* [[TMP17]] -// CHECK: [[TMP18:%.*]] = bitcast %struct.poly16x4x4_t* %agg.result to i8* -// CHECK: [[TMP19:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP18]], i8* [[TMP19]], i32 32, i32 8, i1 false) -// CHECK: ret void +// CHECK: [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) { return vld4_lane_p16(a, b, 3); } - -// CHECK-LABEL: define <8 x i8> @test_vmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmax_s8( // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VMAX_V_I]] int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { return vmax_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmax_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> [[VMAX_V_I]], <4 x i16> [[VMAX_V1_I]]) #4 +// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VMAX_V2_I]] int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { return vmax_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmax_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> [[VMAX_V_I]], <2 x i32> [[VMAX_V1_I]]) #4 +// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VMAX_V2_I]] int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { return vmax_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmax_u8( // CHECK: [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VMAX_V_I]] uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { return vmax_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmax_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> [[VMAX_V_I]], <4 x i16> [[VMAX_V1_I]]) #4 +// CHECK: [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VMAX_V2_I]] uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { return vmax_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmax_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> [[VMAX_V_I]], <2 x i32> [[VMAX_V1_I]]) #4 +// CHECK: [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VMAX_V2_I]] uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { return vmax_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vmax_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vmax_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> [[VMAX_V_I]], <2 x float> [[VMAX_V1_I]]) #4 +// CHECK: [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMAX_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: ret <2 x float> [[VMAX_V2_I]] float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { return vmax_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vmaxq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vmaxq_s8( // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VMAXQ_V_I]] int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { return vmaxq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmaxq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vmaxq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> [[VMAXQ_V_I]], <8 x i16> [[VMAXQ_V1_I]]) #4 +// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VMAXQ_V2_I]] int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { return vmaxq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmaxq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vmaxq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> [[VMAXQ_V_I]], <4 x i32> [[VMAXQ_V1_I]]) #4 +// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VMAXQ_V2_I]] int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { return vmaxq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vmaxq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vmaxq_u8( // CHECK: [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VMAXQ_V_I]] uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { return vmaxq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmaxq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vmaxq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> [[VMAXQ_V_I]], <8 x i16> [[VMAXQ_V1_I]]) #4 +// CHECK: [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VMAXQ_V2_I]] uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { return vmaxq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmaxq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vmaxq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> [[VMAXQ_V_I]], <4 x i32> [[VMAXQ_V1_I]]) #4 +// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VMAXQ_V2_I]] uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { return vmaxq_u32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vmaxq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vmaxq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMAXQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMAXQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> [[VMAXQ_V_I]], <4 x float> [[VMAXQ_V1_I]]) #4 +// CHECK: [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMAXQ_V3_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP2]] +// CHECK: ret <4 x float> [[VMAXQ_V2_I]] float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { return vmaxq_f32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmin_s8( // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VMIN_V_I]] int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { return vmin_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmin_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> [[VMIN_V_I]], <4 x i16> [[VMIN_V1_I]]) #4 +// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VMIN_V2_I]] int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { return vmin_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmin_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> [[VMIN_V_I]], <2 x i32> [[VMIN_V1_I]]) #4 +// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VMIN_V2_I]] int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { return vmin_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmin_u8( // CHECK: [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VMIN_V_I]] uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { return vmin_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmin_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> [[VMIN_V_I]], <4 x i16> [[VMIN_V1_I]]) #4 +// CHECK: [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VMIN_V2_I]] uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { return vmin_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmin_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> [[VMIN_V_I]], <2 x i32> [[VMIN_V1_I]]) #4 +// CHECK: [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VMIN_V2_I]] uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { return vmin_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vmin_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vmin_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> [[VMIN_V_I]], <2 x float> [[VMIN_V1_I]]) #4 +// CHECK: [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VMIN_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: ret <2 x float> [[VMIN_V2_I]] float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { return vmin_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vminq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vminq_s8( // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VMINQ_V_I]] int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { return vminq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vminq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vminq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> [[VMINQ_V_I]], <8 x i16> [[VMINQ_V1_I]]) #4 +// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VMINQ_V2_I]] int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { return vminq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vminq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vminq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> [[VMINQ_V_I]], <4 x i32> [[VMINQ_V1_I]]) #4 +// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VMINQ_V2_I]] int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { return vminq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vminq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vminq_u8( // CHECK: [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VMINQ_V_I]] uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { return vminq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vminq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vminq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> [[VMINQ_V_I]], <8 x i16> [[VMINQ_V1_I]]) #4 +// CHECK: [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VMINQ_V2_I]] uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { return vminq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vminq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vminq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> [[VMINQ_V_I]], <4 x i32> [[VMINQ_V1_I]]) #4 +// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VMINQ_V2_I]] uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { return vminq_u32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vminq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vminq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VMINQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VMINQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> [[VMINQ_V_I]], <4 x float> [[VMINQ_V1_I]]) #4 +// CHECK: [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VMINQ_V3_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP2]] +// CHECK: ret <4 x float> [[VMINQ_V2_I]] float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { return vminq_f32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vmla_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vmla_s8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]] // CHECK: ret <8 x i8> [[ADD_I]] @@ -8813,7 +7265,7 @@ int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) { return vmla_s8(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vmla_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmla_s16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] // CHECK: ret <4 x i16> [[ADD_I]] @@ -8821,7 +7273,7 @@ int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) { return vmla_s16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmla_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmla_s32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] // CHECK: ret <2 x i32> [[ADD_I]] @@ -8829,7 +7281,7 @@ int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) { return vmla_s32(a, b, c); } -// CHECK-LABEL: define <2 x float> @test_vmla_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { +// CHECK-LABEL: @test_vmla_f32( // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]] // CHECK: ret <2 x float> [[ADD_I]] @@ -8837,7 +7289,7 @@ float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vmla_f32(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vmla_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vmla_u8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c // CHECK: [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]] // CHECK: ret <8 x i8> [[ADD_I]] @@ -8845,7 +7297,7 @@ uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { return vmla_u8(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vmla_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmla_u16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c // CHECK: [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]] // CHECK: ret <4 x i16> [[ADD_I]] @@ -8853,7 +7305,7 @@ uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { return vmla_u16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmla_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmla_u32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c // CHECK: [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]] // CHECK: ret <2 x i32> [[ADD_I]] @@ -8861,7 +7313,7 @@ uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { return vmla_u32(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vmlaq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlaq_s8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]] // CHECK: ret <16 x i8> [[ADD_I]] @@ -8869,7 +7321,7 @@ int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { return vmlaq_s8(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlaq_s16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -8877,7 +7329,7 @@ int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { return vmlaq_s16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlaq_s32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] // CHECK: ret <4 x i32> [[ADD_I]] @@ -8885,7 +7337,7 @@ int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { return vmlaq_s32(a, b, c); } -// CHECK-LABEL: define <4 x float> @test_vmlaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +// CHECK-LABEL: @test_vmlaq_f32( // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]] // CHECK: ret <4 x float> [[ADD_I]] @@ -8893,7 +7345,7 @@ float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { return vmlaq_f32(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vmlaq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlaq_u8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c // CHECK: [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]] // CHECK: ret <16 x i8> [[ADD_I]] @@ -8901,7 +7353,7 @@ uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { return vmlaq_u8(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlaq_u16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -8909,7 +7361,7 @@ uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { return vmlaq_u16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlaq_u32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]] // CHECK: ret <4 x i32> [[ADD_I]] @@ -8917,8 +7369,7 @@ uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { return vmlaq_u32(a, b, c); } - -// CHECK-LABEL: define <8 x i16> @test_vmlal_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlal_s8( // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -8926,31 +7377,27 @@ int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { return vmlal_s8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlal_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vmlal_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlal_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vmlal_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlal_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlal_u8( // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[ADD_I]] @@ -8958,146 +7405,123 @@ uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { return vmlal_u8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlal_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { return vmlal_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlal_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { return vmlal_u32(a, b, c); } - -// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlal_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vmlal_lane_s16(a, b, c, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlal_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vmlal_lane_s32(a, b, c, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlal_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[ADD]] uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { return vmlal_lane_u16(a, b, c, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlal_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 // CHECK: [[ADD:%.*]] = add <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[ADD]] uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { return vmlal_lane_u32(a, b, c, 1); } - -// CHECK-LABEL: define <4 x i32> @test_vmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { +// CHECK-LABEL: @test_vmlal_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vmlal_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlal_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vmlal_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlal_n_u16(<4 x i32> %a, <4 x i16> %b, i16 zeroext %c) #0 { +// CHECK-LABEL: @test_vmlal_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[ADD_I]] uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { return vmlal_n_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlal_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlal_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[ADD_I]] uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { return vmlal_n_u32(a, b, c); } - -// CHECK-LABEL: define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmla_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] @@ -9106,7 +7530,7 @@ int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { return vmla_lane_s16(a, b, c, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmla_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] @@ -9115,7 +7539,7 @@ int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { return vmla_lane_s32(a, b, c, 1); } -// CHECK-LABEL: define <4 x i16> @test_vmla_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmla_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i16> %a, [[MUL]] @@ -9124,7 +7548,7 @@ uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { return vmla_lane_u16(a, b, c, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmla_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmla_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <2 x i32> %a, [[MUL]] @@ -9133,7 +7557,7 @@ uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { return vmla_lane_u32(a, b, c, 1); } -// CHECK-LABEL: define <2 x float> @test_vmla_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { +// CHECK-LABEL: @test_vmla_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <2 x i32> // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]] @@ -9142,7 +7566,7 @@ float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vmla_lane_f32(a, b, c, 1); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlaq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] @@ -9151,7 +7575,7 @@ int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { return vmlaq_lane_s16(a, b, c, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlaq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] @@ -9160,7 +7584,7 @@ int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { return vmlaq_lane_s32(a, b, c, 1); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlaq_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <8 x i16> %a, [[MUL]] @@ -9169,7 +7593,7 @@ uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) { return vmlaq_lane_u16(a, b, c, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlaq_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = add <4 x i32> %a, [[MUL]] @@ -9178,7 +7602,7 @@ uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) { return vmlaq_lane_u32(a, b, c, 1); } -// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %c) #0 { +// CHECK-LABEL: @test_vmlaq_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <4 x i32> // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] // CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]] @@ -9187,8 +7611,7 @@ float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) { return vmlaq_lane_f32(a, b, c, 1); } - -// CHECK-LABEL: define <4 x i16> @test_vmla_n_s16(<4 x i16> %a, <4 x i16> %b, i16 signext %c) #0 { +// CHECK-LABEL: @test_vmla_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -9200,7 +7623,7 @@ int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) { return vmla_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmla_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmla_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] @@ -9210,7 +7633,7 @@ int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) { return vmla_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vmla_n_u16(<4 x i16> %a, <4 x i16> %b, i16 zeroext %c) #0 { +// CHECK-LABEL: @test_vmla_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -9222,7 +7645,7 @@ uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { return vmla_n_u16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmla_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmla_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] @@ -9232,7 +7655,7 @@ uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { return vmla_n_u32(a, b, c); } -// CHECK-LABEL: define <2 x float> @test_vmla_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { +// CHECK-LABEL: @test_vmla_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] @@ -9242,7 +7665,7 @@ float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { return vmla_n_f32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) #0 { +// CHECK-LABEL: @test_vmlaq_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -9258,7 +7681,7 @@ int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { return vmlaq_n_s16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlaq_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 @@ -9270,7 +7693,7 @@ int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { return vmlaq_n_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) #0 { +// CHECK-LABEL: @test_vmlaq_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -9286,7 +7709,7 @@ uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { return vmlaq_n_u16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlaq_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 @@ -9298,7 +7721,7 @@ uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { return vmlaq_n_u32(a, b, c); } -// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { +// CHECK-LABEL: @test_vmlaq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 @@ -9310,8 +7733,7 @@ float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { return vmlaq_n_f32(a, b, c); } - -// CHECK-LABEL: define <8 x i8> @test_vmls_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vmls_s8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]] // CHECK: ret <8 x i8> [[SUB_I]] @@ -9319,7 +7741,7 @@ int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) { return vmls_s8(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vmls_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmls_s16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] // CHECK: ret <4 x i16> [[SUB_I]] @@ -9327,7 +7749,7 @@ int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) { return vmls_s16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmls_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmls_s32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] // CHECK: ret <2 x i32> [[SUB_I]] @@ -9335,7 +7757,7 @@ int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) { return vmls_s32(a, b, c); } -// CHECK-LABEL: define <2 x float> @test_vmls_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { +// CHECK-LABEL: @test_vmls_f32( // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, %c // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]] // CHECK: ret <2 x float> [[SUB_I]] @@ -9343,7 +7765,7 @@ float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vmls_f32(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vmls_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vmls_u8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %b, %c // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]] // CHECK: ret <8 x i8> [[SUB_I]] @@ -9351,7 +7773,7 @@ uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { return vmls_u8(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vmls_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmls_u16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %b, %c // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]] // CHECK: ret <4 x i16> [[SUB_I]] @@ -9359,7 +7781,7 @@ uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { return vmls_u16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmls_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmls_u32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, %c // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]] // CHECK: ret <2 x i32> [[SUB_I]] @@ -9367,7 +7789,7 @@ uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { return vmls_u32(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vmlsq_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlsq_s8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]] // CHECK: ret <16 x i8> [[SUB_I]] @@ -9375,7 +7797,7 @@ int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) { return vmlsq_s8(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlsq_s16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] // CHECK: ret <8 x i16> [[SUB_I]] @@ -9383,7 +7805,7 @@ int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { return vmlsq_s16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlsq_s32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] // CHECK: ret <4 x i32> [[SUB_I]] @@ -9391,7 +7813,7 @@ int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { return vmlsq_s32(a, b, c); } -// CHECK-LABEL: define <4 x float> @test_vmlsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +// CHECK-LABEL: @test_vmlsq_f32( // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %b, %c // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]] // CHECK: ret <4 x float> [[SUB_I]] @@ -9399,7 +7821,7 @@ float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { return vmlsq_f32(a, b, c); } -// CHECK-LABEL: define <16 x i8> @test_vmlsq_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlsq_u8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %b, %c // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]] // CHECK: ret <16 x i8> [[SUB_I]] @@ -9407,7 +7829,7 @@ uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { return vmlsq_u8(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_u16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlsq_u16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %b, %c // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]] // CHECK: ret <8 x i16> [[SUB_I]] @@ -9415,7 +7837,7 @@ uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { return vmlsq_u16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_u32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlsq_u32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %b, %c // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]] // CHECK: ret <4 x i32> [[SUB_I]] @@ -9423,8 +7845,7 @@ uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { return vmlsq_u32(a, b, c); } - -// CHECK-LABEL: define <8 x i16> @test_vmlsl_s8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlsl_s8( // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] @@ -9432,31 +7853,27 @@ int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { return vmlsl_s8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlsl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vmlsl_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlsl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vmlsl_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsl_u8(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vmlsl_u8( // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] @@ -9464,146 +7881,123 @@ uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { return vmlsl_u8(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlsl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { return vmlsl_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlsl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c) #4 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { return vmlsl_u32(a, b, c); } - -// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlsl_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vmlsl_lane_s16(a, b, c, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlsl_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vmlsl_lane_s32(a, b, c, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlsl_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[VMULL2_I]] // CHECK: ret <4 x i32> [[SUB]] uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { return vmlsl_lane_u16(a, b, c, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlsl_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 // CHECK: [[SUB:%.*]] = sub <2 x i64> %a, [[VMULL2_I]] // CHECK: ret <2 x i64> [[SUB]] uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { return vmlsl_lane_u32(a, b, c, 1); } - -// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { +// CHECK-LABEL: @test_vmlsl_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vmlsl_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlsl_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vmlsl_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsl_n_u16(<4 x i32> %a, <4 x i16> %b, i16 zeroext %c) #0 { +// CHECK-LABEL: @test_vmlsl_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I_I]], <4 x i16> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { return vmlsl_n_u16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vmlsl_n_u32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlsl_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I_I]], <2 x i32> [[VMULL1_I_I]]) #4 +// CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { return vmlsl_n_u32(a, b, c); } - -// CHECK-LABEL: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmls_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] @@ -9612,7 +8006,7 @@ int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { return vmls_lane_s16(a, b, c, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmls_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] @@ -9621,7 +8015,7 @@ int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { return vmls_lane_s32(a, b, c, 1); } -// CHECK-LABEL: define <4 x i16> @test_vmls_lane_u16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmls_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i16> %a, [[MUL]] @@ -9630,7 +8024,7 @@ uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) { return vmls_lane_u16(a, b, c, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmls_lane_u32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmls_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <2 x i32> %a, [[MUL]] @@ -9639,7 +8033,7 @@ uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) { return vmls_lane_u32(a, b, c, 1); } -// CHECK-LABEL: define <2 x float> @test_vmls_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { +// CHECK-LABEL: @test_vmls_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <2 x i32> // CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]] @@ -9648,7 +8042,7 @@ float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) { return vmls_lane_f32(a, b, c, 1); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlsq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] @@ -9657,7 +8051,7 @@ int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { return vmlsq_lane_s16(a, b, c, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlsq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] @@ -9666,7 +8060,7 @@ int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { return vmlsq_lane_s32(a, b, c, 1); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_lane_u16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vmlsq_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <8 x i16> %a, [[MUL]] @@ -9675,7 +8069,7 @@ uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) { return vmlsq_lane_u16(a, b, c, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_lane_u32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vmlsq_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = sub <4 x i32> %a, [[MUL]] @@ -9684,7 +8078,7 @@ uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) { return vmlsq_lane_u32(a, b, c, 1); } -// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %c) #0 { +// CHECK-LABEL: @test_vmlsq_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %c, <2 x float> %c, <4 x i32> // CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]] // CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]] @@ -9693,8 +8087,7 @@ float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) { return vmlsq_lane_f32(a, b, c, 1); } - -// CHECK-LABEL: define <4 x i16> @test_vmls_n_s16(<4 x i16> %a, <4 x i16> %b, i16 signext %c) #0 { +// CHECK-LABEL: @test_vmls_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -9706,7 +8099,7 @@ int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) { return vmls_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmls_n_s32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmls_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] @@ -9716,7 +8109,7 @@ int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) { return vmls_n_s32(a, b, c); } -// CHECK-LABEL: define <4 x i16> @test_vmls_n_u16(<4 x i16> %a, <4 x i16> %b, i16 zeroext %c) #0 { +// CHECK-LABEL: @test_vmls_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -9728,7 +8121,7 @@ uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { return vmls_n_u16(a, b, c); } -// CHECK-LABEL: define <2 x i32> @test_vmls_n_u32(<2 x i32> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmls_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]] @@ -9738,7 +8131,7 @@ uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { return vmls_n_u32(a, b, c); } -// CHECK-LABEL: define <2 x float> @test_vmls_n_f32(<2 x float> %a, <2 x float> %b, float %c) #0 { +// CHECK-LABEL: @test_vmls_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]] @@ -9748,7 +8141,7 @@ float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { return vmls_n_f32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) #0 { +// CHECK-LABEL: @test_vmlsq_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -9764,7 +8157,7 @@ int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { return vmlsq_n_s16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlsq_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 @@ -9776,7 +8169,7 @@ int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { return vmlsq_n_s32(a, b, c); } -// CHECK-LABEL: define <8 x i16> @test_vmlsq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) #0 { +// CHECK-LABEL: @test_vmlsq_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2 @@ -9792,7 +8185,7 @@ uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { return vmlsq_n_u16(a, b, c); } -// CHECK-LABEL: define <4 x i32> @test_vmlsq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vmlsq_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2 @@ -9804,7 +8197,7 @@ uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { return vmlsq_n_u32(a, b, c); } -// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 { +// CHECK-LABEL: @test_vmlsq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2 @@ -9816,114 +8209,101 @@ float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { return vmlsq_n_f32(a, b, c); } - -// CHECK-LABEL: define <8 x i16> @test_vmovl_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vmovl_s8( // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I]] int16x8_t test_vmovl_s8(int8x8_t a) { return vmovl_s8(a); } -// CHECK-LABEL: define <4 x i32> @test_vmovl_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vmovl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I]] int32x4_t test_vmovl_s16(int16x4_t a) { return vmovl_s16(a); } -// CHECK-LABEL: define <2 x i64> @test_vmovl_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vmovl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I]] int64x2_t test_vmovl_s32(int32x2_t a) { return vmovl_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vmovl_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vmovl_u8( // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[VMOVL_I]] uint16x8_t test_vmovl_u8(uint8x8_t a) { return vmovl_u8(a); } -// CHECK-LABEL: define <4 x i32> @test_vmovl_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vmovl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[VMOVL_I]] uint32x4_t test_vmovl_u16(uint16x4_t a) { return vmovl_u16(a); } -// CHECK-LABEL: define <2 x i64> @test_vmovl_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vmovl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[VMOVL_I]] uint64x2_t test_vmovl_u32(uint32x2_t a) { return vmovl_u32(a); } - -// CHECK-LABEL: define <8 x i8> @test_vmovn_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vmovn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8> +// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[VMOVN_I]] int8x8_t test_vmovn_s16(int16x8_t a) { return vmovn_s16(a); } -// CHECK-LABEL: define <4 x i16> @test_vmovn_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vmovn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16> +// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[VMOVN_I]] int16x4_t test_vmovn_s32(int32x4_t a) { return vmovn_s32(a); } -// CHECK-LABEL: define <2 x i32> @test_vmovn_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vmovn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> +// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[VMOVN_I]] int32x2_t test_vmovn_s64(int64x2_t a) { return vmovn_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vmovn_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vmovn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i8> +// CHECK: [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[VMOVN_I]] uint8x8_t test_vmovn_u16(uint16x8_t a) { return vmovn_u16(a); } -// CHECK-LABEL: define <4 x i16> @test_vmovn_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vmovn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i16> +// CHECK: [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[VMOVN_I]] uint16x4_t test_vmovn_u32(uint32x4_t a) { return vmovn_u32(a); } -// CHECK-LABEL: define <2 x i32> @test_vmovn_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vmovn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32> +// CHECK: [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[VMOVN_I]] uint32x2_t test_vmovn_u64(uint64x2_t a) { return vmovn_u64(a); } - -// CHECK-LABEL: define <8 x i8> @test_vmov_n_u8(i8 zeroext %a) #0 { +// CHECK-LABEL: @test_vmov_n_u8( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -9937,7 +8317,7 @@ uint8x8_t test_vmov_n_u8(uint8_t a) { return vmov_n_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vmov_n_u16(i16 zeroext %a) #0 { +// CHECK-LABEL: @test_vmov_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -9947,7 +8327,7 @@ uint16x4_t test_vmov_n_u16(uint16_t a) { return vmov_n_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vmov_n_u32(i32 %a) #0 { +// CHECK-LABEL: @test_vmov_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 // CHECK: ret <2 x i32> [[VECINIT1_I]] @@ -9955,7 +8335,7 @@ uint32x2_t test_vmov_n_u32(uint32_t a) { return vmov_n_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vmov_n_s8(i8 signext %a) #0 { +// CHECK-LABEL: @test_vmov_n_s8( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -9969,7 +8349,7 @@ int8x8_t test_vmov_n_s8(int8_t a) { return vmov_n_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vmov_n_s16(i16 signext %a) #0 { +// CHECK-LABEL: @test_vmov_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -9979,7 +8359,7 @@ int16x4_t test_vmov_n_s16(int16_t a) { return vmov_n_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vmov_n_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vmov_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1 // CHECK: ret <2 x i32> [[VECINIT1_I]] @@ -9987,7 +8367,7 @@ int32x2_t test_vmov_n_s32(int32_t a) { return vmov_n_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vmov_n_p8(i8 signext %a) #0 { +// CHECK-LABEL: @test_vmov_n_p8( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -10001,7 +8381,7 @@ poly8x8_t test_vmov_n_p8(poly8_t a) { return vmov_n_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vmov_n_p16(i16 signext %a) #0 { +// CHECK-LABEL: @test_vmov_n_p16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -10011,7 +8391,7 @@ poly16x4_t test_vmov_n_p16(poly16_t a) { return vmov_n_p16(a); } -// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a) #0 { +// CHECK-LABEL: @test_vmov_n_f16( // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 // CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0 // CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1 @@ -10022,7 +8402,7 @@ float16x4_t test_vmov_n_f16(float16_t *a) { return vmov_n_f16(*a); } -// CHECK-LABEL: define <2 x float> @test_vmov_n_f32(float %a) #0 { +// CHECK-LABEL: @test_vmov_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1 // CHECK: ret <2 x float> [[VECINIT1_I]] @@ -10030,7 +8410,7 @@ float32x2_t test_vmov_n_f32(float32_t a) { return vmov_n_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vmovq_n_u8(i8 zeroext %a) #0 { +// CHECK-LABEL: @test_vmovq_n_u8( // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -10052,7 +8432,7 @@ uint8x16_t test_vmovq_n_u8(uint8_t a) { return vmovq_n_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vmovq_n_u16(i16 zeroext %a) #0 { +// CHECK-LABEL: @test_vmovq_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -10066,7 +8446,7 @@ uint16x8_t test_vmovq_n_u16(uint16_t a) { return vmovq_n_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vmovq_n_u32(i32 %a) #0 { +// CHECK-LABEL: @test_vmovq_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 @@ -10076,7 +8456,7 @@ uint32x4_t test_vmovq_n_u32(uint32_t a) { return vmovq_n_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vmovq_n_s8(i8 signext %a) #0 { +// CHECK-LABEL: @test_vmovq_n_s8( // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -10098,7 +8478,7 @@ int8x16_t test_vmovq_n_s8(int8_t a) { return vmovq_n_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vmovq_n_s16(i16 signext %a) #0 { +// CHECK-LABEL: @test_vmovq_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -10112,7 +8492,7 @@ int16x8_t test_vmovq_n_s16(int16_t a) { return vmovq_n_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vmovq_n_s32(i32 %a) #0 { +// CHECK-LABEL: @test_vmovq_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2 @@ -10122,7 +8502,7 @@ int32x4_t test_vmovq_n_s32(int32_t a) { return vmovq_n_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vmovq_n_p8(i8 signext %a) #0 { +// CHECK-LABEL: @test_vmovq_n_p8( // CHECK: [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2 @@ -10144,7 +8524,7 @@ poly8x16_t test_vmovq_n_p8(poly8_t a) { return vmovq_n_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vmovq_n_p16(i16 signext %a) #0 { +// CHECK-LABEL: @test_vmovq_n_p16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2 @@ -10158,7 +8538,7 @@ poly16x8_t test_vmovq_n_p16(poly16_t a) { return vmovq_n_p16(a); } -// CHECK-LABEL: define <8 x half> @test_vmovq_n_f16(half* %a) #0 { +// CHECK-LABEL: @test_vmovq_n_f16( // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 // CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0 // CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1 @@ -10173,7 +8553,7 @@ float16x8_t test_vmovq_n_f16(float16_t *a) { return vmovq_n_f16(*a); } -// CHECK-LABEL: define <4 x float> @test_vmovq_n_f32(float %a) #0 { +// CHECK-LABEL: @test_vmovq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2 @@ -10183,7 +8563,7 @@ float32x4_t test_vmovq_n_f32(float32_t a) { return vmovq_n_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vmov_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vmov_n_s64( // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] // CHECK: ret <1 x i64> [[ADD_I]] @@ -10192,7 +8572,7 @@ int64x1_t test_vmov_n_s64(int64_t a) { return vadd_s64(tmp, tmp); } -// CHECK-LABEL: define <1 x i64> @test_vmov_n_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vmov_n_u64( // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0 // CHECK: [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]] // CHECK: ret <1 x i64> [[ADD_I]] @@ -10201,7 +8581,7 @@ uint64x1_t test_vmov_n_u64(uint64_t a) { return vadd_u64(tmp, tmp); } -// CHECK-LABEL: define <2 x i64> @test_vmovq_n_s64(i64 %a) #0 { +// CHECK-LABEL: @test_vmovq_n_s64( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 // CHECK: ret <2 x i64> [[VECINIT1_I]] @@ -10209,7 +8589,7 @@ int64x2_t test_vmovq_n_s64(int64_t a) { return vmovq_n_s64(a); } -// CHECK-LABEL: define <2 x i64> @test_vmovq_n_u64(i64 %a) #0 { +// CHECK-LABEL: @test_vmovq_n_u64( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1 // CHECK: ret <2 x i64> [[VECINIT1_I]] @@ -10217,294 +8597,264 @@ uint64x2_t test_vmovq_n_u64(uint64_t a) { return vmovq_n_u64(a); } - -// CHECK-LABEL: define <8 x i8> @test_vmul_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmul_s8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b // CHECK: ret <8 x i8> [[MUL_I]] int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) { return vmul_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmul_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmul_s16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b // CHECK: ret <4 x i16> [[MUL_I]] int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) { return vmul_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmul_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmul_s32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b // CHECK: ret <2 x i32> [[MUL_I]] int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) { return vmul_s32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vmul_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vmul_f32( // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, %b // CHECK: ret <2 x float> [[MUL_I]] float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) { return vmul_f32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vmul_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmul_u8( // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %a, %b // CHECK: ret <8 x i8> [[MUL_I]] uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) { return vmul_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmul_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmul_u16( // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %a, %b // CHECK: ret <4 x i16> [[MUL_I]] uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) { return vmul_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmul_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmul_u32( // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, %b // CHECK: ret <2 x i32> [[MUL_I]] uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) { return vmul_u32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vmulq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vmulq_s8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b // CHECK: ret <16 x i8> [[MUL_I]] int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) { return vmulq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vmulq_s16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b // CHECK: ret <8 x i16> [[MUL_I]] int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) { return vmulq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vmulq_s32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b // CHECK: ret <4 x i32> [[MUL_I]] int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) { return vmulq_s32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vmulq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vmulq_f32( // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %a, %b // CHECK: ret <4 x float> [[MUL_I]] float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) { return vmulq_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vmulq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vmulq_u8( // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %a, %b // CHECK: ret <16 x i8> [[MUL_I]] uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) { return vmulq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vmulq_u16( // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %a, %b // CHECK: ret <8 x i16> [[MUL_I]] uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) { return vmulq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vmulq_u32( // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %a, %b // CHECK: ret <4 x i32> [[MUL_I]] uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) { return vmulq_u32(a, b); } - -// CHECK-LABEL: define <8 x i16> @test_vmull_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmull_s8( // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i16> [[VMULL_I]] int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { return vmull_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmull_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { return vmull_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmull_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) { return vmull_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmull_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmull_u8( // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i16> [[VMULL_I]] uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { return vmull_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmull_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmull_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { return vmull_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmull_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { return vmull_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmull_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmull_p8( // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i16> [[VMULL_I]] poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { return vmull_p8(a, b); } - -// CHECK-LABEL: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmull_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #4 // CHECK: ret <4 x i32> [[VMULL2_I]] int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) { return vmull_lane_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmull_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #4 // CHECK: ret <2 x i64> [[VMULL2_I]] int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) { return vmull_lane_s32(a, b, 1); } -// CHECK-LABEL: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmull_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #4 // CHECK: ret <4 x i32> [[VMULL2_I]] uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) { return vmull_lane_u16(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmull_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL1_I]]) #4 +// CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #4 // CHECK: ret <2 x i64> [[VMULL2_I]] uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) { return vmull_lane_u32(a, b, 1); } - -// CHECK-LABEL: define <4 x i32> @test_vmull_n_s16(<4 x i16> %a, i16 signext %b) #0 { +// CHECK-LABEL: @test_vmull_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #4 +// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #4 // CHECK: ret <4 x i32> [[VMULL5_I]] int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { return vmull_n_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_n_s32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmull_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #4 +// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #4 // CHECK: ret <2 x i64> [[VMULL3_I]] int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { return vmull_n_s32(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmull_n_u16(<4 x i16> %a, i16 zeroext %b) #0 { +// CHECK-LABEL: @test_vmull_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMULL4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[VMULL_I]], <4 x i16> [[VMULL4_I]]) #4 +// CHECK: [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #4 // CHECK: ret <4 x i32> [[VMULL5_I]] uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { return vmull_n_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vmull_n_u32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmull_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VMULL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMULL2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[VMULL_I]], <2 x i32> [[VMULL2_I]]) #4 +// CHECK: [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #4 // CHECK: ret <2 x i64> [[VMULL3_I]] uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { return vmull_n_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vmul_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vmul_p8( // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VMUL_V_I]] poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) { return vmul_p8(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vmulq_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vmulq_p8( // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VMULQ_V_I]] poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) { return vmulq_p8(a, b); } - -// CHECK-LABEL: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmul_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] // CHECK: ret <4 x i16> [[MUL]] @@ -10512,7 +8862,7 @@ int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) { return vmul_lane_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmul_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] // CHECK: ret <2 x i32> [[MUL]] @@ -10520,7 +8870,7 @@ int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) { return vmul_lane_s32(a, b, 1); } -// CHECK-LABEL: define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vmul_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %b, <2 x float> %b, <2 x i32> // CHECK: [[MUL:%.*]] = fmul <2 x float> %a, [[SHUFFLE]] // CHECK: ret <2 x float> [[MUL]] @@ -10528,7 +8878,7 @@ float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) { return vmul_lane_f32(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmul_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i16> %a, [[SHUFFLE]] // CHECK: ret <4 x i16> [[MUL]] @@ -10536,7 +8886,7 @@ uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) { return vmul_lane_u16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmul_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[MUL:%.*]] = mul <2 x i32> %a, [[SHUFFLE]] // CHECK: ret <2 x i32> [[MUL]] @@ -10544,7 +8894,7 @@ uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) { return vmul_lane_u32(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmulq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] // CHECK: ret <8 x i16> [[MUL]] @@ -10552,7 +8902,7 @@ int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) { return vmulq_lane_s16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmulq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] // CHECK: ret <4 x i32> [[MUL]] @@ -10560,7 +8910,7 @@ int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) { return vmulq_lane_s32(a, b, 1); } -// CHECK-LABEL: define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vmulq_lane_f32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %b, <2 x float> %b, <4 x i32> // CHECK: [[MUL:%.*]] = fmul <4 x float> %a, [[SHUFFLE]] // CHECK: ret <4 x float> [[MUL]] @@ -10568,7 +8918,7 @@ float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) { return vmulq_lane_f32(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vmulq_lane_u16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> // CHECK: [[MUL:%.*]] = mul <8 x i16> %a, [[SHUFFLE]] // CHECK: ret <8 x i16> [[MUL]] @@ -10576,7 +8926,7 @@ uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) { return vmulq_lane_u16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vmulq_lane_u32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> // CHECK: [[MUL:%.*]] = mul <4 x i32> %a, [[SHUFFLE]] // CHECK: ret <4 x i32> [[MUL]] @@ -10584,8 +8934,7 @@ uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) { return vmulq_lane_u32(a, b, 1); } - -// CHECK-LABEL: define <4 x i16> @test_vmul_n_s16(<4 x i16> %a, i16 signext %b) #0 { +// CHECK-LABEL: @test_vmul_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 @@ -10596,7 +8945,7 @@ int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) { return vmul_n_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmul_n_s32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmul_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] @@ -10605,7 +8954,7 @@ int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) { return vmul_n_s32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 { +// CHECK-LABEL: @test_vmul_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]] @@ -10614,7 +8963,7 @@ float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) { return vmul_n_f32(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vmul_n_u16(<4 x i16> %a, i16 zeroext %b) #0 { +// CHECK-LABEL: @test_vmul_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 @@ -10625,7 +8974,7 @@ uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) { return vmul_n_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vmul_n_u32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmul_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]] @@ -10634,7 +8983,7 @@ uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) { return vmul_n_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_n_s16(<8 x i16> %a, i16 signext %b) #0 { +// CHECK-LABEL: @test_vmulq_n_s16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 @@ -10649,7 +8998,7 @@ int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) { return vmulq_n_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_n_s32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmulq_n_s32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 @@ -10660,7 +9009,7 @@ int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) { return vmulq_n_s32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 { +// CHECK-LABEL: @test_vmulq_n_f32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2 @@ -10671,7 +9020,7 @@ float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) { return vmulq_n_f32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vmulq_n_u16(<8 x i16> %a, i16 zeroext %b) #0 { +// CHECK-LABEL: @test_vmulq_n_u16( // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2 @@ -10686,7 +9035,7 @@ uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) { return vmulq_n_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vmulq_n_u32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vmulq_n_u32( // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 @@ -10697,164 +9046,161 @@ uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { return vmulq_n_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vmvn_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvn_s8( // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, // CHECK: ret <8 x i8> [[NEG_I]] int8x8_t test_vmvn_s8(int8x8_t a) { return vmvn_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vmvn_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vmvn_s16( // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, // CHECK: ret <4 x i16> [[NEG_I]] int16x4_t test_vmvn_s16(int16x4_t a) { return vmvn_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vmvn_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vmvn_s32( // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, // CHECK: ret <2 x i32> [[NEG_I]] int32x2_t test_vmvn_s32(int32x2_t a) { return vmvn_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vmvn_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvn_u8( // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, // CHECK: ret <8 x i8> [[NEG_I]] uint8x8_t test_vmvn_u8(uint8x8_t a) { return vmvn_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vmvn_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vmvn_u16( // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %a, // CHECK: ret <4 x i16> [[NEG_I]] uint16x4_t test_vmvn_u16(uint16x4_t a) { return vmvn_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vmvn_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vmvn_u32( // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %a, // CHECK: ret <2 x i32> [[NEG_I]] uint32x2_t test_vmvn_u32(uint32x2_t a) { return vmvn_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vmvn_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvn_p8( // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %a, // CHECK: ret <8 x i8> [[NEG_I]] poly8x8_t test_vmvn_p8(poly8x8_t a) { return vmvn_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vmvnq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvnq_s8( // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, // CHECK: ret <16 x i8> [[NEG_I]] int8x16_t test_vmvnq_s8(int8x16_t a) { return vmvnq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vmvnq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vmvnq_s16( // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, // CHECK: ret <8 x i16> [[NEG_I]] int16x8_t test_vmvnq_s16(int16x8_t a) { return vmvnq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vmvnq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vmvnq_s32( // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, // CHECK: ret <4 x i32> [[NEG_I]] int32x4_t test_vmvnq_s32(int32x4_t a) { return vmvnq_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vmvnq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvnq_u8( // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, // CHECK: ret <16 x i8> [[NEG_I]] uint8x16_t test_vmvnq_u8(uint8x16_t a) { return vmvnq_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vmvnq_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vmvnq_u16( // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %a, // CHECK: ret <8 x i16> [[NEG_I]] uint16x8_t test_vmvnq_u16(uint16x8_t a) { return vmvnq_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vmvnq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vmvnq_u32( // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %a, // CHECK: ret <4 x i32> [[NEG_I]] uint32x4_t test_vmvnq_u32(uint32x4_t a) { return vmvnq_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vmvnq_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vmvnq_p8( // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %a, // CHECK: ret <16 x i8> [[NEG_I]] poly8x16_t test_vmvnq_p8(poly8x16_t a) { return vmvnq_p8(a); } - -// CHECK-LABEL: define <8 x i8> @test_vneg_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vneg_s8( // CHECK: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a // CHECK: ret <8 x i8> [[SUB_I]] int8x8_t test_vneg_s8(int8x8_t a) { return vneg_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vneg_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vneg_s16( // CHECK: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a // CHECK: ret <4 x i16> [[SUB_I]] int16x4_t test_vneg_s16(int16x4_t a) { return vneg_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vneg_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vneg_s32( // CHECK: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a // CHECK: ret <2 x i32> [[SUB_I]] int32x2_t test_vneg_s32(int32x2_t a) { return vneg_s32(a); } -// CHECK-LABEL: define <2 x float> @test_vneg_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vneg_f32( // CHECK: [[SUB_I:%.*]] = fsub <2 x float> , %a // CHECK: ret <2 x float> [[SUB_I]] float32x2_t test_vneg_f32(float32x2_t a) { return vneg_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vnegq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vnegq_s8( // CHECK: [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a // CHECK: ret <16 x i8> [[SUB_I]] int8x16_t test_vnegq_s8(int8x16_t a) { return vnegq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vnegq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vnegq_s16( // CHECK: [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a // CHECK: ret <8 x i16> [[SUB_I]] int16x8_t test_vnegq_s16(int16x8_t a) { return vnegq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vnegq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vnegq_s32( // CHECK: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vnegq_s32(int32x4_t a) { return vnegq_s32(a); } -// CHECK-LABEL: define <4 x float> @test_vnegq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vnegq_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> , %a // CHECK: ret <4 x float> [[SUB_I]] float32x4_t test_vnegq_f32(float32x4_t a) { return vnegq_f32(a); } - -// CHECK-LABEL: define <8 x i8> @test_vorn_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vorn_s8( // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[OR_I]] @@ -10862,7 +9208,7 @@ int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { return vorn_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vorn_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vorn_s16( // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[OR_I]] @@ -10870,7 +9216,7 @@ int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { return vorn_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vorn_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vorn_s32( // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[OR_I]] @@ -10878,7 +9224,7 @@ int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { return vorn_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vorn_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vorn_s64( // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[OR_I]] @@ -10886,7 +9232,7 @@ int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { return vorn_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vorn_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vorn_u8( // CHECK: [[NEG_I:%.*]] = xor <8 x i8> %b, // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]] // CHECK: ret <8 x i8> [[OR_I]] @@ -10894,7 +9240,7 @@ uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { return vorn_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vorn_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vorn_u16( // CHECK: [[NEG_I:%.*]] = xor <4 x i16> %b, // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]] // CHECK: ret <4 x i16> [[OR_I]] @@ -10902,7 +9248,7 @@ uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { return vorn_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vorn_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vorn_u32( // CHECK: [[NEG_I:%.*]] = xor <2 x i32> %b, // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]] // CHECK: ret <2 x i32> [[OR_I]] @@ -10910,7 +9256,7 @@ uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { return vorn_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vorn_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vorn_u64( // CHECK: [[NEG_I:%.*]] = xor <1 x i64> %b, // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]] // CHECK: ret <1 x i64> [[OR_I]] @@ -10918,7 +9264,7 @@ uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { return vorn_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vornq_s8( // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] @@ -10926,7 +9272,7 @@ int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { return vornq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vornq_s16( // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] @@ -10934,7 +9280,7 @@ int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { return vornq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vornq_s32( // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] @@ -10942,7 +9288,7 @@ int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { return vornq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vornq_s64( // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] @@ -10950,7 +9296,7 @@ int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { return vornq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vornq_u8( // CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]] // CHECK: ret <16 x i8> [[OR_I]] @@ -10958,7 +9304,7 @@ uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { return vornq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vornq_u16( // CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]] // CHECK: ret <8 x i16> [[OR_I]] @@ -10966,7 +9312,7 @@ uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { return vornq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vornq_u32( // CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]] // CHECK: ret <4 x i32> [[OR_I]] @@ -10974,7 +9320,7 @@ uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { return vornq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vornq_u64( // CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]] // CHECK: ret <2 x i64> [[OR_I]] @@ -10982,891 +9328,751 @@ uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) { return vornq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vorr_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vorr_s8( // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b // CHECK: ret <8 x i8> [[OR_I]] int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) { return vorr_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vorr_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vorr_s16( // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b // CHECK: ret <4 x i16> [[OR_I]] int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) { return vorr_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vorr_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vorr_s32( // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b // CHECK: ret <2 x i32> [[OR_I]] int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) { return vorr_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vorr_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vorr_s64( // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b // CHECK: ret <1 x i64> [[OR_I]] int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) { return vorr_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vorr_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vorr_u8( // CHECK: [[OR_I:%.*]] = or <8 x i8> %a, %b // CHECK: ret <8 x i8> [[OR_I]] uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) { return vorr_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vorr_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vorr_u16( // CHECK: [[OR_I:%.*]] = or <4 x i16> %a, %b // CHECK: ret <4 x i16> [[OR_I]] uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) { return vorr_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vorr_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vorr_u32( // CHECK: [[OR_I:%.*]] = or <2 x i32> %a, %b // CHECK: ret <2 x i32> [[OR_I]] uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) { return vorr_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vorr_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vorr_u64( // CHECK: [[OR_I:%.*]] = or <1 x i64> %a, %b // CHECK: ret <1 x i64> [[OR_I]] uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) { return vorr_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vorrq_s8( // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b // CHECK: ret <16 x i8> [[OR_I]] int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) { return vorrq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vorrq_s16( // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b // CHECK: ret <8 x i16> [[OR_I]] int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) { return vorrq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vorrq_s32( // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b // CHECK: ret <4 x i32> [[OR_I]] int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) { return vorrq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vorrq_s64( // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b // CHECK: ret <2 x i64> [[OR_I]] int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) { return vorrq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vorrq_u8( // CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b // CHECK: ret <16 x i8> [[OR_I]] uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) { return vorrq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vorrq_u16( // CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b // CHECK: ret <8 x i16> [[OR_I]] uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) { return vorrq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vorrq_u32( // CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b // CHECK: ret <4 x i32> [[OR_I]] uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) { return vorrq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vorrq_u64( // CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b // CHECK: ret <2 x i64> [[OR_I]] uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) { return vorrq_u64(a, b); } - -// CHECK-LABEL: define <4 x i16> @test_vpadal_s8(<4 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadal_s8( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> [[VPADAL_V_I]], <8 x i8> %b) #4 +// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4 // CHECK: ret <4 x i16> [[VPADAL_V1_I]] int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) { return vpadal_s8(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpadal_s16(<2 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadal_s16( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> [[VPADAL_V_I]], <4 x i16> [[VPADAL_V1_I]]) #4 +// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4 // CHECK: ret <2 x i32> [[VPADAL_V2_I]] int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) { return vpadal_s16(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vpadal_s32(<1 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadal_s32( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> [[VPADAL_V_I]], <2 x i32> [[VPADAL_V1_I]]) #4 +// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4 // CHECK: ret <1 x i64> [[VPADAL_V2_I]] int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) { return vpadal_s32(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpadal_u8(<4 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadal_u8( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> [[VPADAL_V_I]], <8 x i8> %b) #4 +// CHECK: [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b) #4 // CHECK: ret <4 x i16> [[VPADAL_V1_I]] uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) { return vpadal_u8(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpadal_u16(<2 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadal_u16( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> [[VPADAL_V_I]], <4 x i16> [[VPADAL_V1_I]]) #4 +// CHECK: [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b) #4 // CHECK: ret <2 x i32> [[VPADAL_V2_I]] uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) { return vpadal_u16(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vpadal_u32(<1 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadal_u32( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADAL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VPADAL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> [[VPADAL_V_I]], <2 x i32> [[VPADAL_V1_I]]) #4 +// CHECK: [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b) #4 // CHECK: ret <1 x i64> [[VPADAL_V2_I]] uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) { return vpadal_u32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vpadalq_s8(<8 x i16> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadalq_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> [[VPADALQ_V_I]], <16 x i8> %b) #4 +// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]] int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) { return vpadalq_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vpadalq_s16(<4 x i32> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadalq_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> [[VPADALQ_V_I]], <8 x i16> [[VPADALQ_V1_I]]) #4 +// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]] int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) { return vpadalq_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vpadalq_s32(<2 x i64> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadalq_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> [[VPADALQ_V_I]], <4 x i32> [[VPADALQ_V1_I]]) #4 +// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]] int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) { return vpadalq_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vpadalq_u8(<8 x i16> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadalq_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> [[VPADALQ_V_I]], <16 x i8> %b) #4 +// CHECK: [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b) #4 // CHECK: ret <8 x i16> [[VPADALQ_V1_I]] uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) { return vpadalq_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vpadalq_u16(<4 x i32> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadalq_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> [[VPADALQ_V_I]], <8 x i16> [[VPADALQ_V1_I]]) #4 +// CHECK: [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b) #4 // CHECK: ret <4 x i32> [[VPADALQ_V2_I]] uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) { return vpadalq_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vpadalq_u32(<2 x i64> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadalq_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VPADALQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VPADALQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> [[VPADALQ_V_I]], <4 x i32> [[VPADALQ_V1_I]]) #4 +// CHECK: [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b) #4 // CHECK: ret <2 x i64> [[VPADALQ_V2_I]] uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) { return vpadalq_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vpadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadd_s8( // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPADD_V_I]] int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { return vpadd_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VPADD_V2_I]] int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { return vpadd_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VPADD_V2_I]] int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { return vpadd_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vpadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpadd_u8( // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPADD_V_I]] uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { return vpadd_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> [[VPADD_V_I]], <4 x i16> [[VPADD_V1_I]]) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VPADD_V2_I]] uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { return vpadd_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> [[VPADD_V_I]], <2 x i32> [[VPADD_V1_I]]) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VPADD_V2_I]] uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { return vpadd_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vpadd_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vpadd_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> [[VPADD_V_I]], <2 x float> [[VPADD_V1_I]]) #4 +// CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPADD_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: ret <2 x float> [[VPADD_V2_I]] float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { return vpadd_f32(a, b); } - -// CHECK-LABEL: define <4 x i16> @test_vpaddl_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vpaddl_s8( // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a) #4 // CHECK: ret <4 x i16> [[VPADDL_I]] int16x4_t test_vpaddl_s8(int8x8_t a) { return vpaddl_s8(a); } -// CHECK-LABEL: define <2 x i32> @test_vpaddl_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vpaddl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a) #4 // CHECK: ret <2 x i32> [[VPADDL1_I]] int32x2_t test_vpaddl_s16(int16x4_t a) { return vpaddl_s16(a); } -// CHECK-LABEL: define <1 x i64> @test_vpaddl_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vpaddl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a) #4 // CHECK: ret <1 x i64> [[VPADDL1_I]] int64x1_t test_vpaddl_s32(int32x2_t a) { return vpaddl_s32(a); } -// CHECK-LABEL: define <4 x i16> @test_vpaddl_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vpaddl_u8( // CHECK: [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a) #4 // CHECK: ret <4 x i16> [[VPADDL_I]] uint16x4_t test_vpaddl_u8(uint8x8_t a) { return vpaddl_u8(a); } -// CHECK-LABEL: define <2 x i32> @test_vpaddl_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vpaddl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> [[VPADDL_I]]) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a) #4 // CHECK: ret <2 x i32> [[VPADDL1_I]] uint32x2_t test_vpaddl_u16(uint16x4_t a) { return vpaddl_u16(a); } -// CHECK-LABEL: define <1 x i64> @test_vpaddl_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vpaddl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> [[VPADDL_I]]) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a) #4 // CHECK: ret <1 x i64> [[VPADDL1_I]] uint64x1_t test_vpaddl_u32(uint32x2_t a) { return vpaddl_u32(a); } -// CHECK-LABEL: define <8 x i16> @test_vpaddlq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_s8( // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a) #4 // CHECK: ret <8 x i16> [[VPADDL_I]] int16x8_t test_vpaddlq_s8(int8x16_t a) { return vpaddlq_s8(a); } -// CHECK-LABEL: define <4 x i32> @test_vpaddlq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a) #4 // CHECK: ret <4 x i32> [[VPADDL1_I]] int32x4_t test_vpaddlq_s16(int16x8_t a) { return vpaddlq_s16(a); } -// CHECK-LABEL: define <2 x i64> @test_vpaddlq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a) #4 // CHECK: ret <2 x i64> [[VPADDL1_I]] int64x2_t test_vpaddlq_s32(int32x4_t a) { return vpaddlq_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vpaddlq_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_u8( // CHECK: [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a) #4 // CHECK: ret <8 x i16> [[VPADDL_I]] uint16x8_t test_vpaddlq_u8(uint8x16_t a) { return vpaddlq_u8(a); } -// CHECK-LABEL: define <4 x i32> @test_vpaddlq_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> [[VPADDL_I]]) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a) #4 // CHECK: ret <4 x i32> [[VPADDL1_I]] uint32x4_t test_vpaddlq_u16(uint16x8_t a) { return vpaddlq_u16(a); } -// CHECK-LABEL: define <2 x i64> @test_vpaddlq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vpaddlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VPADDL_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> [[VPADDL_I]]) #4 +// CHECK: [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a) #4 // CHECK: ret <2 x i64> [[VPADDL1_I]] uint64x2_t test_vpaddlq_u32(uint32x4_t a) { return vpaddlq_u32(a); } - -// CHECK-LABEL: define <8 x i8> @test_vpmax_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpmax_s8( // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPMAX_V_I]] int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { return vpmax_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpmax_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpmax_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> [[VPMAX_V_I]], <4 x i16> [[VPMAX_V1_I]]) #4 +// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VPMAX_V2_I]] int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { return vpmax_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpmax_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpmax_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> [[VPMAX_V_I]], <2 x i32> [[VPMAX_V1_I]]) #4 +// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VPMAX_V2_I]] int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { return vpmax_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vpmax_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpmax_u8( // CHECK: [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPMAX_V_I]] uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { return vpmax_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpmax_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpmax_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> [[VPMAX_V_I]], <4 x i16> [[VPMAX_V1_I]]) #4 +// CHECK: [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VPMAX_V2_I]] uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { return vpmax_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpmax_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpmax_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> [[VPMAX_V_I]], <2 x i32> [[VPMAX_V1_I]]) #4 +// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VPMAX_V2_I]] uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { return vpmax_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vpmax_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vpmax_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMAX_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPMAX_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> [[VPMAX_V_I]], <2 x float> [[VPMAX_V1_I]]) #4 +// CHECK: [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMAX_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: ret <2 x float> [[VPMAX_V2_I]] float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { return vpmax_f32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vpmin_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpmin_s8( // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPMIN_V_I]] int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { return vpmin_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpmin_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpmin_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> [[VPMIN_V_I]], <4 x i16> [[VPMIN_V1_I]]) #4 +// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VPMIN_V2_I]] int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { return vpmin_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpmin_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpmin_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> [[VPMIN_V_I]], <2 x i32> [[VPMIN_V1_I]]) #4 +// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VPMIN_V2_I]] int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { return vpmin_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vpmin_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vpmin_u8( // CHECK: [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VPMIN_V_I]] uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { return vpmin_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vpmin_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vpmin_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> [[VPMIN_V_I]], <4 x i16> [[VPMIN_V1_I]]) #4 +// CHECK: [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VPMIN_V2_I]] uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { return vpmin_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vpmin_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vpmin_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> [[VPMIN_V_I]], <2 x i32> [[VPMIN_V1_I]]) #4 +// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VPMIN_V2_I]] uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { return vpmin_u32(a, b); } -// CHECK-LABEL: define <2 x float> @test_vpmin_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vpmin_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VPMIN_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VPMIN_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> [[VPMIN_V_I]], <2 x float> [[VPMIN_V1_I]]) #4 +// CHECK: [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VPMIN_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: ret <2 x float> [[VPMIN_V2_I]] float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { return vpmin_f32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vqabs_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vqabs_s8( // CHECK: [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a) #4 // CHECK: ret <8 x i8> [[VQABS_V_I]] int8x8_t test_vqabs_s8(int8x8_t a) { return vqabs_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vqabs_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vqabs_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> [[VQABS_V_I]]) #4 +// CHECK: [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a) #4 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VQABS_V1_I]] int16x4_t test_vqabs_s16(int16x4_t a) { return vqabs_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vqabs_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vqabs_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VQABS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> [[VQABS_V_I]]) #4 +// CHECK: [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a) #4 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQABS_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VQABS_V1_I]] int32x2_t test_vqabs_s32(int32x2_t a) { return vqabs_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vqabsq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vqabsq_s8( // CHECK: [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a) #4 // CHECK: ret <16 x i8> [[VQABSQ_V_I]] int8x16_t test_vqabsq_s8(int8x16_t a) { return vqabsq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vqabsq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqabsq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> [[VQABSQ_V_I]]) #4 +// CHECK: [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a) #4 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP1]] +// CHECK: ret <8 x i16> [[VQABSQ_V1_I]] int16x8_t test_vqabsq_s16(int16x8_t a) { return vqabsq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vqabsq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqabsq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQABSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> [[VQABSQ_V_I]]) #4 +// CHECK: [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a) #4 // CHECK: [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQABSQ_V2_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP1]] +// CHECK: ret <4 x i32> [[VQABSQ_V1_I]] int32x4_t test_vqabsq_s32(int32x4_t a) { return vqabsq_s32(a); } - -// CHECK-LABEL: define <8 x i8> @test_vqadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqadd_s8( // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQADD_V_I]] int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { return vqadd_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQADD_V2_I]] int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { return vqadd_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQADD_V2_I]] int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { return vqadd_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqadd_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqadd_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQADD_V2_I]] int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { return vqadd_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqadd_u8( // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQADD_V_I]] uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { return vqadd_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> [[VQADD_V_I]], <4 x i16> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQADD_V2_I]] uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { return vqadd_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> [[VQADD_V_I]], <2 x i32> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQADD_V2_I]] uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { return vqadd_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqadd_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqadd_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> [[VQADD_V_I]], <1 x i64> [[VQADD_V1_I]]) #4 +// CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQADD_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQADD_V2_I]] uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { return vqadd_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqaddq_s8( // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQADDQ_V_I]] int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { return vqaddq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQADDQ_V2_I]] int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { return vqaddq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQADDQ_V2_I]] int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { return vqaddq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqaddq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqaddq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQADDQ_V2_I]] int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { return vqaddq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqaddq_u8( // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQADDQ_V_I]] uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { return vqaddq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> [[VQADDQ_V_I]], <8 x i16> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQADDQ_V2_I]] uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { return vqaddq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> [[VQADDQ_V_I]], <4 x i32> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQADDQ_V2_I]] uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { return vqaddq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqaddq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqaddq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> [[VQADDQ_V_I]], <2 x i64> [[VQADDQ_V1_I]]) #4 +// CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQADDQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQADDQ_V2_I]] uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { return vqaddq_u64(a, b); } - -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vqdmlal_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlal_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vqdmlal_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlal_s32(a, b, c); } - -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vqdmlal_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlal_lane_s16(a, b, c, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vqdmlal_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlal_lane_s32(a, b, c, 1); } - -// CHECK-LABEL: define <4 x i32> @test_vqdmlal_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { +// CHECK-LABEL: @test_vqdmlal_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 @@ -11874,94 +10080,73 @@ int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #4 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> [[VQDMLAL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #4 +// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) #4 // CHECK: ret <4 x i32> [[VQDMLAL_V6_I]] int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlal_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlal_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vqdmlal_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #4 -// CHECK: [[VQDMLAL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> [[VQDMLAL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #4 +// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) #4 // CHECK: ret <2 x i64> [[VQDMLAL_V4_I]] int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlal_n_s32(a, b, c); } - -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) #4 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlsl_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) #4 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlsl_s32(a, b, c); } - -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %c, <4 x i16> %c, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[SHUFFLE]]) #4 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) #4 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) { return vqdmlsl_lane_s16(a, b, c, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %c, <2 x i32> %c, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL1_I]]) #4 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL2_I]]) #4 +// CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[SHUFFLE]]) #4 +// CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) #4 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { return vqdmlsl_lane_s32(a, b, c, 1); } - -// CHECK-LABEL: define <4 x i32> @test_vqdmlsl_n_s16(<4 x i32> %a, <4 x i16> %b, i16 signext %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0 @@ -11969,176 +10154,137 @@ int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) { // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMLAL4_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMLAL_I]], <4 x i16> [[VQDMLAL4_I]]) #4 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQDMLSL_V_I]], <4 x i32> [[VQDMLAL5_I]]) #4 +// CHECK: [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]]) #4 +// CHECK: [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]]) #4 // CHECK: ret <4 x i32> [[VQDMLSL_V6_I]] int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { return vqdmlsl_n_s16(a, b, c); } -// CHECK-LABEL: define <2 x i64> @test_vqdmlsl_n_s32(<2 x i64> %a, <2 x i32> %b, i32 %c) #0 { +// CHECK-LABEL: @test_vqdmlsl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMLAL_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMLAL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMLAL_I]], <2 x i32> [[VQDMLAL2_I]]) #4 -// CHECK: [[VQDMLSL_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQDMLSL_V_I]], <2 x i64> [[VQDMLAL3_I]]) #4 +// CHECK: [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]]) #4 +// CHECK: [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]]) #4 // CHECK: ret <2 x i64> [[VQDMLSL_V4_I]] int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { return vqdmlsl_n_s32(a, b, c); } - -// CHECK-LABEL: define <4 x i16> @test_vqdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqdmulh_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { return vqdmulh_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqdmulh_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { return vqdmulh_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqdmulhq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { return vqdmulhq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqdmulhq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { return vqdmulhq_s32(a, b); } - -// CHECK-LABEL: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqdmulh_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V1_I]]) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #4 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQDMULH_V2_I]] int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) { return vqdmulh_lane_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqdmulh_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V1_I]]) #4 +// CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #4 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQDMULH_V2_I]] int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) { return vqdmulh_lane_s32(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqdmulhq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V1_I]]) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #4 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) { return vqdmulhq_lane_s16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqdmulhq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V1_I]]) #4 +// CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #4 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) { return vqdmulhq_lane_s32(a, b, 1); } - -// CHECK-LABEL: define <4 x i16> @test_vqdmulh_n_s16(<4 x i16> %a, i16 signext %b) #0 { +// CHECK-LABEL: @test_vqdmulh_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[VQDMULH_V_I]], <4 x i16> [[VQDMULH_V4_I]]) #4 +// CHECK: [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #4 // CHECK: [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V6_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQDMULH_V5_I]] int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { return vqdmulh_n_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqdmulh_n_s32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqdmulh_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[VQDMULH_V_I]], <2 x i32> [[VQDMULH_V2_I]]) #4 +// CHECK: [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #4 // CHECK: [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQDMULH_V4_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQDMULH_V3_I]] int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { return vqdmulh_n_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqdmulhq_n_s16(<8 x i16> %a, i16 signext %b) #0 { +// CHECK-LABEL: @test_vqdmulhq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 @@ -12149,420 +10295,334 @@ int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[VQDMULHQ_V_I]], <8 x i16> [[VQDMULHQ_V8_I]]) #4 +// CHECK: [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) #4 // CHECK: [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V10_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQDMULHQ_V9_I]] int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { return vqdmulhq_n_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqdmulhq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[VQDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[VQDMULHQ_V_I]], <4 x i32> [[VQDMULHQ_V4_I]]) #4 +// CHECK: [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) #4 // CHECK: [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULHQ_V6_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULHQ_V5_I]] int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { return vqdmulhq_n_s32(a, b); } - -// CHECK-LABEL: define <4 x i32> @test_vqdmull_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqdmull_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { return vqdmull_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqdmull_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { return vqdmull_s32(a, b); } - -// CHECK-LABEL: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqdmull_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V1_I]]) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #4 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V2_I]] int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) { return vqdmull_lane_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqdmull_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V1_I]]) #4 +// CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #4 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V2_I]] int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) { return vqdmull_lane_s32(a, b, 1); } - -// CHECK-LABEL: define <4 x i32> @test_vqdmull_n_s16(<4 x i16> %a, i16 signext %b) #0 { +// CHECK-LABEL: @test_vqdmull_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[VQDMULL_V_I]], <4 x i16> [[VQDMULL_V4_I]]) #4 +// CHECK: [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #4 // CHECK: [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V6_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQDMULL_V5_I]] int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { return vqdmull_n_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqdmull_n_s32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqdmull_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQDMULL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQDMULL_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[VQDMULL_V_I]], <2 x i32> [[VQDMULL_V2_I]]) #4 +// CHECK: [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #4 // CHECK: [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQDMULL_V4_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQDMULL_V3_I]] int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { return vqdmull_n_s32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vqmovn_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqmovn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> [[VQMOVN_V_I]]) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a) #4 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]] int8x8_t test_vqmovn_s16(int16x8_t a) { return vqmovn_s16(a); } -// CHECK-LABEL: define <4 x i16> @test_vqmovn_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqmovn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> [[VQMOVN_V_I]]) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a) #4 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VQMOVN_V1_I]] int16x4_t test_vqmovn_s32(int32x4_t a) { return vqmovn_s32(a); } -// CHECK-LABEL: define <2 x i32> @test_vqmovn_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqmovn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> [[VQMOVN_V_I]]) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a) #4 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VQMOVN_V1_I]] int32x2_t test_vqmovn_s64(int64x2_t a) { return vqmovn_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vqmovn_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqmovn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> [[VQMOVN_V_I]]) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a) #4 // CHECK: ret <8 x i8> [[VQMOVN_V1_I]] uint8x8_t test_vqmovn_u16(uint16x8_t a) { return vqmovn_u16(a); } -// CHECK-LABEL: define <4 x i16> @test_vqmovn_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqmovn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> [[VQMOVN_V_I]]) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a) #4 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VQMOVN_V1_I]] uint16x4_t test_vqmovn_u32(uint32x4_t a) { return vqmovn_u32(a); } -// CHECK-LABEL: define <2 x i32> @test_vqmovn_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqmovn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> [[VQMOVN_V_I]]) #4 +// CHECK: [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a) #4 // CHECK: [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVN_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VQMOVN_V1_I]] uint32x2_t test_vqmovn_u64(uint64x2_t a) { return vqmovn_u64(a); } - -// CHECK-LABEL: define <8 x i8> @test_vqmovun_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqmovun_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> [[VQMOVUN_V_I]]) #4 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a) #4 // CHECK: ret <8 x i8> [[VQMOVUN_V1_I]] uint8x8_t test_vqmovun_s16(int16x8_t a) { return vqmovun_s16(a); } -// CHECK-LABEL: define <4 x i16> @test_vqmovun_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqmovun_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> [[VQMOVUN_V_I]]) #4 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a) #4 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VQMOVUN_V1_I]] uint16x4_t test_vqmovun_s32(int32x4_t a) { return vqmovun_s32(a); } -// CHECK-LABEL: define <2 x i32> @test_vqmovun_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqmovun_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[VQMOVUN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> [[VQMOVUN_V_I]]) #4 +// CHECK: [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a) #4 // CHECK: [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQMOVUN_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VQMOVUN_V1_I]] uint32x2_t test_vqmovun_s64(int64x2_t a) { return vqmovun_s64(a); } - -// CHECK-LABEL: define <8 x i8> @test_vqneg_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vqneg_s8( // CHECK: [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a) #4 // CHECK: ret <8 x i8> [[VQNEG_V_I]] int8x8_t test_vqneg_s8(int8x8_t a) { return vqneg_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vqneg_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vqneg_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> [[VQNEG_V_I]]) #4 +// CHECK: [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a) #4 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP1]] +// CHECK: ret <4 x i16> [[VQNEG_V1_I]] int16x4_t test_vqneg_s16(int16x4_t a) { return vqneg_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vqneg_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vqneg_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VQNEG_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> [[VQNEG_V_I]]) #4 +// CHECK: [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a) #4 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[VQNEG_V2_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP1]] +// CHECK: ret <2 x i32> [[VQNEG_V1_I]] int32x2_t test_vqneg_s32(int32x2_t a) { return vqneg_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vqnegq_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vqnegq_s8( // CHECK: [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a) #4 // CHECK: ret <16 x i8> [[VQNEGQ_V_I]] int8x16_t test_vqnegq_s8(int8x16_t a) { return vqnegq_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vqnegq_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqnegq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> [[VQNEGQ_V_I]]) #4 +// CHECK: [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a) #4 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP1]] +// CHECK: ret <8 x i16> [[VQNEGQ_V1_I]] int16x8_t test_vqnegq_s16(int16x8_t a) { return vqnegq_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vqnegq_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqnegq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VQNEGQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> [[VQNEGQ_V_I]]) #4 +// CHECK: [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a) #4 // CHECK: [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[VQNEGQ_V2_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP1]] +// CHECK: ret <4 x i32> [[VQNEGQ_V1_I]] int32x4_t test_vqnegq_s32(int32x4_t a) { return vqnegq_s32(a); } - -// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrdmulh_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { return vqrdmulh_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrdmulh_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { return vqrdmulh_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrdmulhq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { return vqrdmulhq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrdmulhq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { return vqrdmulhq_s32(a, b); } - -// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrdmulh_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V1_I]]) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[SHUFFLE]]) #4 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) { return vqrdmulh_lane_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrdmulh_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <2 x i32> // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRDMULH_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V1_I]]) #4 +// CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[SHUFFLE]]) #4 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) { return vqrdmulh_lane_s32(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrdmulhq_lane_s16( // CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x i16> %b, <4 x i16> %b, <8 x i32> // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V1_I]]) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[SHUFFLE]]) #4 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) { return vqrdmulhq_lane_s16(a, b, 3); } -// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrdmulhq_lane_s32( // CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i32> %b, <2 x i32> %b, <4 x i32> // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V1_I]]) #4 +// CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[SHUFFLE]]) #4 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) { return vqrdmulhq_lane_s32(a, b, 1); } - -// CHECK-LABEL: define <4 x i16> @test_vqrdmulh_n_s16(<4 x i16> %a, i16 signext %b) #0 { +// CHECK-LABEL: @test_vqrdmulh_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[VQRDMULH_V_I]], <4 x i16> [[VQRDMULH_V4_I]]) #4 +// CHECK: [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]]) #4 // CHECK: [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V6_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRDMULH_V5_I]] int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { return vqrdmulh_n_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqrdmulh_n_s32(<2 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqrdmulh_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[VQRDMULH_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRDMULH_V2_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[VQRDMULH_V_I]], <2 x i32> [[VQRDMULH_V2_I]]) #4 +// CHECK: [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]]) #4 // CHECK: [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRDMULH_V4_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRDMULH_V3_I]] int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { return vqrdmulh_n_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqrdmulhq_n_s16(<8 x i16> %a, i16 signext %b) #0 { +// CHECK-LABEL: @test_vqrdmulhq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1 @@ -12573,220 +10633,176 @@ int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { // CHECK: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6 // CHECK: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V8_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[VQRDMULHQ_V_I]], <8 x i16> [[VQRDMULHQ_V8_I]]) #4 +// CHECK: [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]]) #4 // CHECK: [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V10_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRDMULHQ_V9_I]] int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { return vqrdmulhq_n_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqrdmulhq_n_s32(<4 x i32> %a, i32 %b) #0 { +// CHECK-LABEL: @test_vqrdmulhq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[VQRDMULHQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V4_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[VQRDMULHQ_V_I]], <4 x i32> [[VQRDMULHQ_V4_I]]) #4 +// CHECK: [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]]) #4 // CHECK: [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRDMULHQ_V6_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRDMULHQ_V5_I]] int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { return vqrdmulhq_n_s32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vqrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqrshl_s8( // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQRSHL_V_I]] int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { return vqrshl_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRSHL_V2_I]] int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { return vqrshl_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRSHL_V2_I]] int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { return vqrshl_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQRSHL_V2_I]] int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { return vqrshl_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqrshl_u8( // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQRSHL_V_I]] uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { return vqrshl_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> [[VQRSHL_V_I]], <4 x i16> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQRSHL_V2_I]] uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { return vqrshl_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> [[VQRSHL_V_I]], <2 x i32> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQRSHL_V2_I]] uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { return vqrshl_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> [[VQRSHL_V_I]], <1 x i64> [[VQRSHL_V1_I]]) #4 +// CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQRSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQRSHL_V2_I]] uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { return vqrshl_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_s8( // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { return vqrshlq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]] int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { return vqrshlq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]] int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { return vqrshlq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]] int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { return vqrshlq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_u8( // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { return vqrshlq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> [[VQRSHLQ_V_I]], <8 x i16> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]] uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { return vqrshlq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> [[VQRSHLQ_V_I]], <4 x i32> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]] uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { return vqrshlq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqrshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> [[VQRSHLQ_V_I]], <2 x i64> [[VQRSHLQ_V1_I]]) #4 +// CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQRSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]] uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { return vqrshlq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> ) @@ -12795,7 +10811,7 @@ int8x8_t test_vqrshrn_n_s16(int16x8_t a) { return vqrshrn_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> ) @@ -12804,7 +10820,7 @@ int16x4_t test_vqrshrn_n_s32(int32x4_t a) { return vqrshrn_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> ) @@ -12813,7 +10829,7 @@ int32x2_t test_vqrshrn_n_s64(int64x2_t a) { return vqrshrn_n_s64(a, 1); } -// CHECK-LABEL: define <8 x i8> @test_vqrshrn_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> ) @@ -12822,7 +10838,7 @@ uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { return vqrshrn_n_u16(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vqrshrn_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> ) @@ -12831,7 +10847,7 @@ uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { return vqrshrn_n_u32(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vqrshrn_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqrshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> ) @@ -12840,8 +10856,7 @@ uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { return vqrshrn_n_u64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vqrshrun_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqrshrun_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> ) @@ -12850,7 +10865,7 @@ uint8x8_t test_vqrshrun_n_s16(int16x8_t a) { return vqrshrun_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vqrshrun_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqrshrun_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> ) @@ -12859,7 +10874,7 @@ uint16x4_t test_vqrshrun_n_s32(int32x4_t a) { return vqrshrun_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vqrshrun_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqrshrun_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> ) @@ -12868,200 +10883,162 @@ uint32x2_t test_vqrshrun_n_s64(int64x2_t a) { return vqrshrun_n_s64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vqshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqshl_s8( // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQSHL_V_I]] int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { return vqshl_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQSHL_V2_I]] int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { return vqshl_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQSHL_V2_I]] int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { return vqshl_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQSHL_V2_I]] int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { return vqshl_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqshl_u8( // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQSHL_V_I]] uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { return vqshl_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_V_I]], <4 x i16> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQSHL_V2_I]] uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { return vqshl_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_V_I]], <2 x i32> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQSHL_V2_I]] uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { return vqshl_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_V_I]], <1 x i64> [[VQSHL_V1_I]]) #4 +// CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQSHL_V2_I]] uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { return vqshl_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqshlq_s8( // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { return vqshlq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { return vqshlq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { return vqshlq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { return vqshlq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqshlq_u8( // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { return vqshlq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHLQ_V_I]], <8 x i16> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { return vqshlq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHLQ_V_I]], <4 x i32> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { return vqshlq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHLQ_V_I]], <2 x i64> [[VQSHLQ_V1_I]]) #4 +// CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { return vqshlq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vqshlu_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshlu_n_s8( // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> ) // CHECK: ret <8 x i8> [[VQSHLU_N]] uint8x8_t test_vqshlu_n_s8(int8x8_t a) { return vqshlu_n_s8(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vqshlu_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshlu_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> ) @@ -13070,7 +11047,7 @@ uint16x4_t test_vqshlu_n_s16(int16x4_t a) { return vqshlu_n_s16(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vqshlu_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshlu_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> ) @@ -13079,7 +11056,7 @@ uint32x2_t test_vqshlu_n_s32(int32x2_t a) { return vqshlu_n_s32(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vqshlu_n_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshlu_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> ) @@ -13088,14 +11065,14 @@ uint64x1_t test_vqshlu_n_s64(int64x1_t a) { return vqshlu_n_s64(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshluq_n_s8( // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> ) // CHECK: ret <16 x i8> [[VQSHLU_N]] uint8x16_t test_vqshluq_n_s8(int8x16_t a) { return vqshluq_n_s8(a, 1); } -// CHECK-LABEL: define <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshluq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> ) @@ -13104,7 +11081,7 @@ uint16x8_t test_vqshluq_n_s16(int16x8_t a) { return vqshluq_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshluq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> ) @@ -13113,7 +11090,7 @@ uint32x4_t test_vqshluq_n_s32(int32x4_t a) { return vqshluq_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vqshluq_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshluq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> ) @@ -13122,15 +11099,14 @@ uint64x2_t test_vqshluq_n_s64(int64x2_t a) { return vqshluq_n_s64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vqshl_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_s8( // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> ) // CHECK: ret <8 x i8> [[VQSHL_N]] int8x8_t test_vqshl_n_s8(int8x8_t a) { return vqshl_n_s8(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vqshl_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> ) @@ -13139,7 +11115,7 @@ int16x4_t test_vqshl_n_s16(int16x4_t a) { return vqshl_n_s16(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vqshl_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> ) @@ -13148,7 +11124,7 @@ int32x2_t test_vqshl_n_s32(int32x2_t a) { return vqshl_n_s32(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vqshl_n_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) @@ -13157,14 +11133,14 @@ int64x1_t test_vqshl_n_s64(int64x1_t a) { return vqshl_n_s64(a, 1); } -// CHECK-LABEL: define <8 x i8> @test_vqshl_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_u8( // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> ) // CHECK: ret <8 x i8> [[VQSHL_N]] uint8x8_t test_vqshl_n_u8(uint8x8_t a) { return vqshl_n_u8(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vqshl_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> ) @@ -13173,7 +11149,7 @@ uint16x4_t test_vqshl_n_u16(uint16x4_t a) { return vqshl_n_u16(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vqshl_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> ) @@ -13182,7 +11158,7 @@ uint32x2_t test_vqshl_n_u32(uint32x2_t a) { return vqshl_n_u32(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vqshl_n_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> ) @@ -13191,14 +11167,14 @@ uint64x1_t test_vqshl_n_u64(uint64x1_t a) { return vqshl_n_u64(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_s8( // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> ) // CHECK: ret <16 x i8> [[VQSHL_N]] int8x16_t test_vqshlq_n_s8(int8x16_t a) { return vqshlq_n_s8(a, 1); } -// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> ) @@ -13207,7 +11183,7 @@ int16x8_t test_vqshlq_n_s16(int16x8_t a) { return vqshlq_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> ) @@ -13216,7 +11192,7 @@ int32x4_t test_vqshlq_n_s32(int32x4_t a) { return vqshlq_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> ) @@ -13225,14 +11201,14 @@ int64x2_t test_vqshlq_n_s64(int64x2_t a) { return vqshlq_n_s64(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_u8( // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> ) // CHECK: ret <16 x i8> [[VQSHL_N]] uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { return vqshlq_n_u8(a, 1); } -// CHECK-LABEL: define <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> ) @@ -13241,7 +11217,7 @@ uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { return vqshlq_n_u16(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> ) @@ -13250,7 +11226,7 @@ uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { return vqshlq_n_u32(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vqshlq_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshlq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> ) @@ -13259,8 +11235,7 @@ uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { return vqshlq_n_u64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> ) @@ -13269,7 +11244,7 @@ int8x8_t test_vqshrn_n_s16(int16x8_t a) { return vqshrn_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> ) @@ -13278,7 +11253,7 @@ int16x4_t test_vqshrn_n_s32(int32x4_t a) { return vqshrn_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> ) @@ -13287,7 +11262,7 @@ int32x2_t test_vqshrn_n_s64(int64x2_t a) { return vqshrn_n_s64(a, 1); } -// CHECK-LABEL: define <8 x i8> @test_vqshrn_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> ) @@ -13296,7 +11271,7 @@ uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { return vqshrn_n_u16(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vqshrn_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> ) @@ -13305,7 +11280,7 @@ uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { return vqshrn_n_u32(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vqshrn_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> ) @@ -13314,8 +11289,7 @@ uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { return vqshrn_n_u64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vqshrun_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vqshrun_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> ) @@ -13324,7 +11298,7 @@ uint8x8_t test_vqshrun_n_s16(int16x8_t a) { return vqshrun_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vqshrun_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vqshrun_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> ) @@ -13333,7 +11307,7 @@ uint16x4_t test_vqshrun_n_s32(int32x4_t a) { return vqshrun_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vqshrun_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vqshrun_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> ) @@ -13342,2722 +11316,2589 @@ uint32x2_t test_vqshrun_n_s64(int64x2_t a) { return vqshrun_n_s64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vqsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqsub_s8( // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQSUB_V_I]] int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { return vqsub_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqsub_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQSUB_V2_I]] int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { return vqsub_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqsub_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQSUB_V2_I]] int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { return vqsub_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqsub_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqsub_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQSUB_V2_I]] int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { return vqsub_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vqsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vqsub_u8( // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VQSUB_V_I]] uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { return vqsub_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vqsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vqsub_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> [[VQSUB_V_I]], <4 x i16> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VQSUB_V2_I]] uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { return vqsub_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vqsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vqsub_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> [[VQSUB_V_I]], <2 x i32> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VQSUB_V2_I]] uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { return vqsub_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vqsub_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vqsub_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VQSUB_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VQSUB_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> [[VQSUB_V_I]], <1 x i64> [[VQSUB_V1_I]]) #4 +// CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VQSUB_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VQSUB_V2_I]] uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { return vqsub_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqsubq_s8( // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { return vqsubq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqsubq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { return vqsubq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqsubq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { return vqsubq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqsubq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { return vqsubq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vqsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vqsubq_u8( // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { return vqsubq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vqsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vqsubq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> [[VQSUBQ_V_I]], <8 x i16> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { return vqsubq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vqsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vqsubq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> [[VQSUBQ_V_I]], <4 x i32> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { return vqsubq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vqsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vqsubq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VQSUBQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VQSUBQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> [[VQSUBQ_V_I]], <2 x i64> [[VQSUBQ_V1_I]]) #4 +// CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VQSUBQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { return vqsubq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vraddhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vraddhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { return vraddhn_s16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vraddhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vraddhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRADDHN_V2_I]] int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { return vraddhn_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vraddhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vraddhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRADDHN_V2_I]] int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { return vraddhn_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vraddhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vraddhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> [[VRADDHN_V_I]], <8 x i16> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { return vraddhn_u16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vraddhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vraddhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> [[VRADDHN_V_I]], <4 x i32> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRADDHN_V2_I]] uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { return vraddhn_u32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vraddhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vraddhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRADDHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRADDHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> [[VRADDHN_V_I]], <2 x i64> [[VRADDHN_V1_I]]) #4 +// CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRADDHN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRADDHN_V2_I]] uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { return vraddhn_u64(a, b); } - -// CHECK-LABEL: define <2 x float> @test_vrecpe_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrecpe_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> [[VRECPE_V_I]]) #4 +// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a) #4 // CHECK: ret <2 x float> [[VRECPE_V1_I]] float32x2_t test_vrecpe_f32(float32x2_t a) { return vrecpe_f32(a); } -// CHECK-LABEL: define <2 x i32> @test_vrecpe_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrecpe_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VRECPE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> [[VRECPE_V_I]]) #4 +// CHECK: [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a) #4 // CHECK: ret <2 x i32> [[VRECPE_V1_I]] uint32x2_t test_vrecpe_u32(uint32x2_t a) { return vrecpe_u32(a); } -// CHECK-LABEL: define <4 x float> @test_vrecpeq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrecpeq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> [[VRECPEQ_V_I]]) #4 +// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a) #4 // CHECK: ret <4 x float> [[VRECPEQ_V1_I]] float32x4_t test_vrecpeq_f32(float32x4_t a) { return vrecpeq_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vrecpeq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrecpeq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VRECPEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> [[VRECPEQ_V_I]]) #4 +// CHECK: [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a) #4 // CHECK: ret <4 x i32> [[VRECPEQ_V1_I]] uint32x4_t test_vrecpeq_u32(uint32x4_t a) { return vrecpeq_u32(a); } - -// CHECK-LABEL: define <2 x float> @test_vrecps_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vrecps_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VRECPS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRECPS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> [[VRECPS_V_I]], <2 x float> [[VRECPS_V1_I]]) #4 +// CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRECPS_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: ret <2 x float> [[VRECPS_V2_I]] float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) { return vrecps_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vrecpsq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vrecpsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VRECPSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRECPSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> [[VRECPSQ_V_I]], <4 x float> [[VRECPSQ_V1_I]]) #4 +// CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRECPSQ_V3_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP2]] +// CHECK: ret <4 x float> [[VRECPSQ_V2_I]] float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) { return vrecpsq_f32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_s16(int16x4_t a) { return vreinterpret_s8_s16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_s32(int32x2_t a) { return vreinterpret_s8_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_s64(int64x1_t a) { return vreinterpret_s8_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_u8( // CHECK: ret <8 x i8> %a int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) { return vreinterpret_s8_u8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) { return vreinterpret_s8_u16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) { return vreinterpret_s8_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) { return vreinterpret_s8_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_f16(float16x4_t a) { return vreinterpret_s8_f16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_f32(float32x2_t a) { return vreinterpret_s8_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_p8( // CHECK: ret <8 x i8> %a int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) { return vreinterpret_s8_p8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_s8_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s8_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) { return vreinterpret_s8_p16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_s8(int8x8_t a) { return vreinterpret_s16_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_s32(int32x2_t a) { return vreinterpret_s16_s32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_s64(int64x1_t a) { return vreinterpret_s16_s64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) { return vreinterpret_s16_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_u16( // CHECK: ret <4 x i16> %a int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) { return vreinterpret_s16_u16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) { return vreinterpret_s16_u32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) { return vreinterpret_s16_u64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_f16(float16x4_t a) { return vreinterpret_s16_f16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_f32(float32x2_t a) { return vreinterpret_s16_f32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) { return vreinterpret_s16_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_s16_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s16_p16( // CHECK: ret <4 x i16> %a int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) { return vreinterpret_s16_p16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_s8(int8x8_t a) { return vreinterpret_s32_s8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_s16(int16x4_t a) { return vreinterpret_s32_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_s64(int64x1_t a) { return vreinterpret_s32_s64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) { return vreinterpret_s32_u8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) { return vreinterpret_s32_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_u32( // CHECK: ret <2 x i32> %a int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) { return vreinterpret_s32_u32(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) { return vreinterpret_s32_u64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_f16(float16x4_t a) { return vreinterpret_s32_f16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_f32(float32x2_t a) { return vreinterpret_s32_f32(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) { return vreinterpret_s32_p8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_s32_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s32_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) { return vreinterpret_s32_p16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_s8(int8x8_t a) { return vreinterpret_s64_s8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_s16(int16x4_t a) { return vreinterpret_s64_s16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_s32(int32x2_t a) { return vreinterpret_s64_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) { return vreinterpret_s64_u8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) { return vreinterpret_s64_u16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) { return vreinterpret_s64_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_u64( // CHECK: ret <1 x i64> %a int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) { return vreinterpret_s64_u64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_f16(float16x4_t a) { return vreinterpret_s64_f16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_f32(float32x2_t a) { return vreinterpret_s64_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) { return vreinterpret_s64_p8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_s64_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_s64_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) { return vreinterpret_s64_p16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_s8( // CHECK: ret <8 x i8> %a uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) { return vreinterpret_u8_s8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) { return vreinterpret_u8_s16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) { return vreinterpret_u8_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) { return vreinterpret_u8_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) { return vreinterpret_u8_u16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) { return vreinterpret_u8_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) { return vreinterpret_u8_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) { return vreinterpret_u8_f16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) { return vreinterpret_u8_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_p8( // CHECK: ret <8 x i8> %a uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) { return vreinterpret_u8_p8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_u8_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u8_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) { return vreinterpret_u8_p16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) { return vreinterpret_u16_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_s16( // CHECK: ret <4 x i16> %a uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) { return vreinterpret_u16_s16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) { return vreinterpret_u16_s32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) { return vreinterpret_u16_s64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) { return vreinterpret_u16_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) { return vreinterpret_u16_u32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) { return vreinterpret_u16_u64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) { return vreinterpret_u16_f16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) { return vreinterpret_u16_f32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) { return vreinterpret_u16_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_u16_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u16_p16( // CHECK: ret <4 x i16> %a uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) { return vreinterpret_u16_p16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) { return vreinterpret_u32_s8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) { return vreinterpret_u32_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_s32( // CHECK: ret <2 x i32> %a uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) { return vreinterpret_u32_s32(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) { return vreinterpret_u32_s64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) { return vreinterpret_u32_u8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) { return vreinterpret_u32_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) { return vreinterpret_u32_u64(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) { return vreinterpret_u32_f16(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) { return vreinterpret_u32_f32(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) { return vreinterpret_u32_p8(a); } -// CHECK-LABEL: define <2 x i32> @test_vreinterpret_u32_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u32_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> // CHECK: ret <2 x i32> [[TMP0]] uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) { return vreinterpret_u32_p16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) { return vreinterpret_u64_s8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) { return vreinterpret_u64_s16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) { return vreinterpret_u64_s32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_s64( // CHECK: ret <1 x i64> %a uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) { return vreinterpret_u64_s64(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) { return vreinterpret_u64_u8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) { return vreinterpret_u64_u16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) { return vreinterpret_u64_u32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) { return vreinterpret_u64_f16(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) { return vreinterpret_u64_f32(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) { return vreinterpret_u64_p8(a); } -// CHECK-LABEL: define <1 x i64> @test_vreinterpret_u64_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_u64_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> // CHECK: ret <1 x i64> [[TMP0]] uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) { return vreinterpret_u64_p16(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_s8(int8x8_t a) { return vreinterpret_f16_s8(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_s16(int16x4_t a) { return vreinterpret_f16_s16(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_s32(int32x2_t a) { return vreinterpret_f16_s32(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_s64(int64x1_t a) { return vreinterpret_f16_s64(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) { return vreinterpret_f16_u8(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) { return vreinterpret_f16_u16(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) { return vreinterpret_f16_u32(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) { return vreinterpret_f16_u64(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_f32(float32x2_t a) { return vreinterpret_f16_f32(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) { return vreinterpret_f16_p8(a); } -// CHECK-LABEL: define <4 x half> @test_vreinterpret_f16_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f16_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> // CHECK: ret <4 x half> [[TMP0]] float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) { return vreinterpret_f16_p16(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_s8(int8x8_t a) { return vreinterpret_f32_s8(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_s16(int16x4_t a) { return vreinterpret_f32_s16(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_s32(int32x2_t a) { return vreinterpret_f32_s32(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_s64(int64x1_t a) { return vreinterpret_f32_s64(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) { return vreinterpret_f32_u8(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) { return vreinterpret_f32_u16(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) { return vreinterpret_f32_u32(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) { return vreinterpret_f32_u64(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_f16(float16x4_t a) { return vreinterpret_f32_f16(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) { return vreinterpret_f32_p8(a); } -// CHECK-LABEL: define <2 x float> @test_vreinterpret_f32_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_f32_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> // CHECK: ret <2 x float> [[TMP0]] float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) { return vreinterpret_f32_p16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_s8( // CHECK: ret <8 x i8> %a poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) { return vreinterpret_p8_s8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) { return vreinterpret_p8_s16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) { return vreinterpret_p8_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) { return vreinterpret_p8_s64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_u8( // CHECK: ret <8 x i8> %a poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) { return vreinterpret_p8_u8(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) { return vreinterpret_p8_u16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) { return vreinterpret_p8_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) { return vreinterpret_p8_u64(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) { return vreinterpret_p8_f16(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) { return vreinterpret_p8_f32(a); } -// CHECK-LABEL: define <8 x i8> @test_vreinterpret_p8_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p8_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: ret <8 x i8> [[TMP0]] poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) { return vreinterpret_p8_p16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_s8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) { return vreinterpret_p16_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_s16( // CHECK: ret <4 x i16> %a poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) { return vreinterpret_p16_s16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) { return vreinterpret_p16_s32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) { return vreinterpret_p16_s64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_u8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) { return vreinterpret_p16_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_u16( // CHECK: ret <4 x i16> %a poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) { return vreinterpret_p16_u16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) { return vreinterpret_p16_u32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) { return vreinterpret_p16_u64(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f16(<4 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_f16( // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) { return vreinterpret_p16_f16(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) { return vreinterpret_p16_f32(a); } -// CHECK-LABEL: define <4 x i16> @test_vreinterpret_p16_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpret_p16_p8( // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> // CHECK: ret <4 x i16> [[TMP0]] poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) { return vreinterpret_p16_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) { return vreinterpretq_s8_s16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) { return vreinterpretq_s8_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) { return vreinterpretq_s8_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_u8( // CHECK: ret <16 x i8> %a int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) { return vreinterpretq_s8_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) { return vreinterpretq_s8_u16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) { return vreinterpretq_s8_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) { return vreinterpretq_s8_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) { return vreinterpretq_s8_f16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) { return vreinterpretq_s8_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_p8( // CHECK: ret <16 x i8> %a int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) { return vreinterpretq_s8_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_s8_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s8_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) { return vreinterpretq_s8_p16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) { return vreinterpretq_s16_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) { return vreinterpretq_s16_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) { return vreinterpretq_s16_s64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) { return vreinterpretq_s16_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_u16( // CHECK: ret <8 x i16> %a int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) { return vreinterpretq_s16_u16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) { return vreinterpretq_s16_u32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) { return vreinterpretq_s16_u64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) { return vreinterpretq_s16_f16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) { return vreinterpretq_s16_f32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) { return vreinterpretq_s16_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_s16_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s16_p16( // CHECK: ret <8 x i16> %a int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) { return vreinterpretq_s16_p16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) { return vreinterpretq_s32_s8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) { return vreinterpretq_s32_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) { return vreinterpretq_s32_s64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) { return vreinterpretq_s32_u8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) { return vreinterpretq_s32_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_u32( // CHECK: ret <4 x i32> %a int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) { return vreinterpretq_s32_u32(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) { return vreinterpretq_s32_u64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) { return vreinterpretq_s32_f16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) { return vreinterpretq_s32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) { return vreinterpretq_s32_p8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_s32_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s32_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) { return vreinterpretq_s32_p16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) { return vreinterpretq_s64_s8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) { return vreinterpretq_s64_s16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) { return vreinterpretq_s64_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) { return vreinterpretq_s64_u8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) { return vreinterpretq_s64_u16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) { return vreinterpretq_s64_u32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_u64( // CHECK: ret <2 x i64> %a int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) { return vreinterpretq_s64_u64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) { return vreinterpretq_s64_f16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) { return vreinterpretq_s64_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) { return vreinterpretq_s64_p8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_s64_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_s64_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) { return vreinterpretq_s64_p16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_s8( // CHECK: ret <16 x i8> %a uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) { return vreinterpretq_u8_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) { return vreinterpretq_u8_s16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) { return vreinterpretq_u8_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) { return vreinterpretq_u8_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) { return vreinterpretq_u8_u16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) { return vreinterpretq_u8_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) { return vreinterpretq_u8_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) { return vreinterpretq_u8_f16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) { return vreinterpretq_u8_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_p8( // CHECK: ret <16 x i8> %a uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) { return vreinterpretq_u8_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_u8_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u8_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) { return vreinterpretq_u8_p16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) { return vreinterpretq_u16_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_s16( // CHECK: ret <8 x i16> %a uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) { return vreinterpretq_u16_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) { return vreinterpretq_u16_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) { return vreinterpretq_u16_s64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) { return vreinterpretq_u16_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) { return vreinterpretq_u16_u32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) { return vreinterpretq_u16_u64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) { return vreinterpretq_u16_f16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) { return vreinterpretq_u16_f32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) { return vreinterpretq_u16_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_u16_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u16_p16( // CHECK: ret <8 x i16> %a uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) { return vreinterpretq_u16_p16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) { return vreinterpretq_u32_s8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) { return vreinterpretq_u32_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_s32( // CHECK: ret <4 x i32> %a uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) { return vreinterpretq_u32_s32(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) { return vreinterpretq_u32_s64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) { return vreinterpretq_u32_u8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) { return vreinterpretq_u32_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) { return vreinterpretq_u32_u64(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) { return vreinterpretq_u32_f16(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) { return vreinterpretq_u32_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) { return vreinterpretq_u32_p8(a); } -// CHECK-LABEL: define <4 x i32> @test_vreinterpretq_u32_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u32_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> // CHECK: ret <4 x i32> [[TMP0]] uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) { return vreinterpretq_u32_p16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) { return vreinterpretq_u64_s8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) { return vreinterpretq_u64_s16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) { return vreinterpretq_u64_s32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_s64( // CHECK: ret <2 x i64> %a uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) { return vreinterpretq_u64_s64(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) { return vreinterpretq_u64_u8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) { return vreinterpretq_u64_u16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) { return vreinterpretq_u64_u32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) { return vreinterpretq_u64_f16(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) { return vreinterpretq_u64_f32(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) { return vreinterpretq_u64_p8(a); } -// CHECK-LABEL: define <2 x i64> @test_vreinterpretq_u64_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_u64_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> // CHECK: ret <2 x i64> [[TMP0]] uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) { return vreinterpretq_u64_p16(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) { return vreinterpretq_f16_s8(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) { return vreinterpretq_f16_s16(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) { return vreinterpretq_f16_s32(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) { return vreinterpretq_f16_s64(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) { return vreinterpretq_f16_u8(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) { return vreinterpretq_f16_u16(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) { return vreinterpretq_f16_u32(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) { return vreinterpretq_f16_u64(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) { return vreinterpretq_f16_f32(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) { return vreinterpretq_f16_p8(a); } -// CHECK-LABEL: define <8 x half> @test_vreinterpretq_f16_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f16_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> // CHECK: ret <8 x half> [[TMP0]] float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) { return vreinterpretq_f16_p16(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) { return vreinterpretq_f32_s8(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) { return vreinterpretq_f32_s16(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) { return vreinterpretq_f32_s32(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) { return vreinterpretq_f32_s64(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) { return vreinterpretq_f32_u8(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) { return vreinterpretq_f32_u16(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) { return vreinterpretq_f32_u32(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) { return vreinterpretq_f32_u64(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) { return vreinterpretq_f32_f16(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) { return vreinterpretq_f32_p8(a); } -// CHECK-LABEL: define <4 x float> @test_vreinterpretq_f32_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_f32_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> // CHECK: ret <4 x float> [[TMP0]] float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) { return vreinterpretq_f32_p16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_s8( // CHECK: ret <16 x i8> %a poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) { return vreinterpretq_p8_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) { return vreinterpretq_p8_s16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) { return vreinterpretq_p8_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) { return vreinterpretq_p8_s64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_u8( // CHECK: ret <16 x i8> %a poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) { return vreinterpretq_p8_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) { return vreinterpretq_p8_u16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) { return vreinterpretq_p8_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) { return vreinterpretq_p8_u64(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) { return vreinterpretq_p8_f16(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) { return vreinterpretq_p8_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vreinterpretq_p8_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p8_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: ret <16 x i8> [[TMP0]] poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) { return vreinterpretq_p8_p16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_s8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) { return vreinterpretq_p16_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_s16( // CHECK: ret <8 x i16> %a poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) { return vreinterpretq_p16_s16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) { return vreinterpretq_p16_s32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) { return vreinterpretq_p16_s64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_u8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) { return vreinterpretq_p16_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_u16( // CHECK: ret <8 x i16> %a poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) { return vreinterpretq_p16_u16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) { return vreinterpretq_p16_u32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) { return vreinterpretq_p16_u64(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f16(<8 x half> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_f16( // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) { return vreinterpretq_p16_f16(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) { return vreinterpretq_p16_f32(a); } -// CHECK-LABEL: define <8 x i16> @test_vreinterpretq_p16_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vreinterpretq_p16_p8( // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> // CHECK: ret <8 x i16> [[TMP0]] poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) { return vreinterpretq_p16_p8(a); } - -// CHECK-LABEL: define <8 x i8> @test_vrev16_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vrev16_s8(int8x8_t a) { return vrev16_s8(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev16_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vrev16_u8(uint8x8_t a) { return vrev16_u8(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev16_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vrev16_p8(poly8x8_t a) { return vrev16_p8(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev16q_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vrev16q_s8(int8x16_t a) { return vrev16q_s8(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev16q_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vrev16q_u8(uint8x16_t a) { return vrev16q_u8(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev16q_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev16q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vrev16q_p8(poly8x16_t a) { return vrev16q_p8(a); } - -// CHECK-LABEL: define <8 x i8> @test_vrev32_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vrev32_s8(int8x8_t a) { return vrev32_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev32_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vrev32_s16(int16x4_t a) { return vrev32_s16(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev32_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vrev32_u8(uint8x8_t a) { return vrev32_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev32_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vrev32_u16(uint16x4_t a) { return vrev32_u16(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev32_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vrev32_p8(poly8x8_t a) { return vrev32_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev32_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vrev32_p16(poly16x4_t a) { return vrev32_p16(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev32q_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vrev32q_s8(int8x16_t a) { return vrev32q_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev32q_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32q_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vrev32q_s16(int16x8_t a) { return vrev32q_s16(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev32q_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vrev32q_u8(uint8x16_t a) { return vrev32q_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev32q_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32q_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vrev32q_u16(uint16x8_t a) { return vrev32q_u16(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev32q_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev32q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vrev32q_p8(poly8x16_t a) { return vrev32q_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev32q_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev32q_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vrev32q_p16(poly16x8_t a) { return vrev32q_p16(a); } - -// CHECK-LABEL: define <8 x i8> @test_vrev64_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] int8x8_t test_vrev64_s8(int8x8_t a) { return vrev64_s8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev64_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] int16x4_t test_vrev64_s16(int16x4_t a) { return vrev64_s16(a); } -// CHECK-LABEL: define <2 x i32> @test_vrev64_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrev64_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] int32x2_t test_vrev64_s32(int32x2_t a) { return vrev64_s32(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev64_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] uint8x8_t test_vrev64_u8(uint8x8_t a) { return vrev64_u8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev64_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] uint16x4_t test_vrev64_u16(uint16x4_t a) { return vrev64_u16(a); } -// CHECK-LABEL: define <2 x i32> @test_vrev64_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrev64_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> // CHECK: ret <2 x i32> [[SHUFFLE_I]] uint32x2_t test_vrev64_u32(uint32x2_t a) { return vrev64_u32(a); } -// CHECK-LABEL: define <8 x i8> @test_vrev64_p8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> // CHECK: ret <8 x i8> [[SHUFFLE_I]] poly8x8_t test_vrev64_p8(poly8x8_t a) { return vrev64_p8(a); } -// CHECK-LABEL: define <4 x i16> @test_vrev64_p16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> // CHECK: ret <4 x i16> [[SHUFFLE_I]] poly16x4_t test_vrev64_p16(poly16x4_t a) { return vrev64_p16(a); } -// CHECK-LABEL: define <2 x float> @test_vrev64_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrev64_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> // CHECK: ret <2 x float> [[SHUFFLE_I]] float32x2_t test_vrev64_f32(float32x2_t a) { return vrev64_f32(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev64q_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64q_s8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] int8x16_t test_vrev64q_s8(int8x16_t a) { return vrev64q_s8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev64q_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64q_s16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] int16x8_t test_vrev64q_s16(int16x8_t a) { return vrev64q_s16(a); } -// CHECK-LABEL: define <4 x i32> @test_vrev64q_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrev64q_s32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] int32x4_t test_vrev64q_s32(int32x4_t a) { return vrev64q_s32(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev64q_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64q_u8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] uint8x16_t test_vrev64q_u8(uint8x16_t a) { return vrev64q_u8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev64q_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64q_u16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] uint16x8_t test_vrev64q_u16(uint16x8_t a) { return vrev64q_u16(a); } -// CHECK-LABEL: define <4 x i32> @test_vrev64q_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrev64q_u32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> // CHECK: ret <4 x i32> [[SHUFFLE_I]] uint32x4_t test_vrev64q_u32(uint32x4_t a) { return vrev64q_u32(a); } -// CHECK-LABEL: define <16 x i8> @test_vrev64q_p8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrev64q_p8( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> // CHECK: ret <16 x i8> [[SHUFFLE_I]] poly8x16_t test_vrev64q_p8(poly8x16_t a) { return vrev64q_p8(a); } -// CHECK-LABEL: define <8 x i16> @test_vrev64q_p16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrev64q_p16( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> // CHECK: ret <8 x i16> [[SHUFFLE_I]] poly16x8_t test_vrev64q_p16(poly16x8_t a) { return vrev64q_p16(a); } -// CHECK-LABEL: define <4 x float> @test_vrev64q_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrev64q_f32( // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> // CHECK: ret <4 x float> [[SHUFFLE_I]] float32x4_t test_vrev64q_f32(float32x4_t a) { return vrev64q_f32(a); } - -// CHECK-LABEL: define <8 x i8> @test_vrhadd_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vrhadd_s8( // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VRHADD_V_I]] int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) { return vrhadd_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vrhadd_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vrhadd_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRHADD_V2_I]] int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) { return vrhadd_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vrhadd_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vrhadd_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRHADD_V2_I]] int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) { return vrhadd_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vrhadd_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vrhadd_u8( // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VRHADD_V_I]] uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) { return vrhadd_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vrhadd_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vrhadd_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> [[VRHADD_V_I]], <4 x i16> [[VRHADD_V1_I]]) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRHADD_V2_I]] uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) { return vrhadd_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vrhadd_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vrhadd_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRHADD_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRHADD_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> [[VRHADD_V_I]], <2 x i32> [[VRHADD_V1_I]]) #4 +// CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRHADD_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRHADD_V2_I]] uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) { return vrhadd_u32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vrhaddq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vrhaddq_s8( // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) { return vrhaddq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vrhaddq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrhaddq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) { return vrhaddq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vrhaddq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrhaddq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) { return vrhaddq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vrhaddq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vrhaddq_u8( // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) { return vrhaddq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vrhaddq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrhaddq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> [[VRHADDQ_V_I]], <8 x i16> [[VRHADDQ_V1_I]]) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) { return vrhaddq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vrhaddq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrhaddq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRHADDQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRHADDQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> [[VRHADDQ_V_I]], <4 x i32> [[VRHADDQ_V1_I]]) #4 +// CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRHADDQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) { return vrhaddq_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vrshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vrshl_s8( // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VRSHL_V_I]] int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { return vrshl_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vrshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vrshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRSHL_V2_I]] int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { return vrshl_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vrshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vrshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRSHL_V2_I]] int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { return vrshl_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vrshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vrshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VRSHL_V2_I]] int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { return vrshl_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vrshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vrshl_u8( // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VRSHL_V_I]] uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { return vrshl_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vrshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vrshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHL_V_I]], <4 x i16> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRSHL_V2_I]] uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { return vrshl_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vrshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vrshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHL_V_I]], <2 x i32> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRSHL_V2_I]] uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { return vrshl_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vrshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vrshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VRSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VRSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHL_V_I]], <1 x i64> [[VRSHL_V1_I]]) #4 +// CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VRSHL_V2_I]] uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { return vrshl_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vrshlq_s8( // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { return vrshlq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VRSHLQ_V2_I]] int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { return vrshlq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VRSHLQ_V2_I]] int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { return vrshlq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vrshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VRSHLQ_V2_I]] int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { return vrshlq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vrshlq_u8( // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { return vrshlq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHLQ_V_I]], <8 x i16> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VRSHLQ_V2_I]] uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { return vrshlq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHLQ_V_I]], <4 x i32> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VRSHLQ_V2_I]] uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { return vrshlq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vrshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHLQ_V_I]], <2 x i64> [[VRSHLQ_V1_I]]) #4 +// CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VRSHLQ_V2_I]] uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { return vrshlq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> ) @@ -16066,7 +13907,7 @@ int8x8_t test_vrshrn_n_s16(int16x8_t a) { return vrshrn_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> ) @@ -16075,7 +13916,7 @@ int16x4_t test_vrshrn_n_s32(int32x4_t a) { return vrshrn_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> ) @@ -16084,7 +13925,7 @@ int32x2_t test_vrshrn_n_s64(int64x2_t a) { return vrshrn_n_s64(a, 1); } -// CHECK-LABEL: define <8 x i8> @test_vrshrn_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> ) @@ -16093,7 +13934,7 @@ uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { return vrshrn_n_u16(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vrshrn_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> ) @@ -16102,7 +13943,7 @@ uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { return vrshrn_n_u32(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vrshrn_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> ) @@ -16111,15 +13952,14 @@ uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { return vrshrn_n_u64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vrshr_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_s8( // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> ) // CHECK: ret <8 x i8> [[VRSHR_N]] int8x8_t test_vrshr_n_s8(int8x8_t a) { return vrshr_n_s8(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vrshr_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) @@ -16128,7 +13968,7 @@ int16x4_t test_vrshr_n_s16(int16x4_t a) { return vrshr_n_s16(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vrshr_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) @@ -16137,7 +13977,7 @@ int32x2_t test_vrshr_n_s32(int32x2_t a) { return vrshr_n_s32(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vrshr_n_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) @@ -16146,14 +13986,14 @@ int64x1_t test_vrshr_n_s64(int64x1_t a) { return vrshr_n_s64(a, 1); } -// CHECK-LABEL: define <8 x i8> @test_vrshr_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_u8( // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> ) // CHECK: ret <8 x i8> [[VRSHR_N]] uint8x8_t test_vrshr_n_u8(uint8x8_t a) { return vrshr_n_u8(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vrshr_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> ) @@ -16162,7 +14002,7 @@ uint16x4_t test_vrshr_n_u16(uint16x4_t a) { return vrshr_n_u16(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vrshr_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> ) @@ -16171,7 +14011,7 @@ uint32x2_t test_vrshr_n_u32(uint32x2_t a) { return vrshr_n_u32(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vrshr_n_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> ) @@ -16180,14 +14020,14 @@ uint64x1_t test_vrshr_n_u64(uint64x1_t a) { return vrshr_n_u64(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_s8( // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> ) // CHECK: ret <16 x i8> [[VRSHR_N]] int8x16_t test_vrshrq_n_s8(int8x16_t a) { return vrshrq_n_s8(a, 1); } -// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) @@ -16196,7 +14036,7 @@ int16x8_t test_vrshrq_n_s16(int16x8_t a) { return vrshrq_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) @@ -16205,7 +14045,7 @@ int32x4_t test_vrshrq_n_s32(int32x4_t a) { return vrshrq_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) @@ -16214,14 +14054,14 @@ int64x2_t test_vrshrq_n_s64(int64x2_t a) { return vrshrq_n_s64(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_u8( // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> ) // CHECK: ret <16 x i8> [[VRSHR_N]] uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { return vrshrq_n_u8(a, 1); } -// CHECK-LABEL: define <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> ) @@ -16230,7 +14070,7 @@ uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { return vrshrq_n_u16(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> ) @@ -16239,7 +14079,7 @@ uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { return vrshrq_n_u32(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vrshrq_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vrshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> ) @@ -16248,72 +14088,59 @@ uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { return vrshrq_n_u64(a, 1); } - -// CHECK-LABEL: define <2 x float> @test_vrsqrte_f32(<2 x float> %a) #0 { +// CHECK-LABEL: @test_vrsqrte_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> [[VRSQRTE_V_I]]) #4 +// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a) #4 // CHECK: ret <2 x float> [[VRSQRTE_V1_I]] float32x2_t test_vrsqrte_f32(float32x2_t a) { return vrsqrte_f32(a); } -// CHECK-LABEL: define <2 x i32> @test_vrsqrte_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vrsqrte_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[VRSQRTE_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> [[VRSQRTE_V_I]]) #4 +// CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a) #4 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]] uint32x2_t test_vrsqrte_u32(uint32x2_t a) { return vrsqrte_u32(a); } -// CHECK-LABEL: define <4 x float> @test_vrsqrteq_f32(<4 x float> %a) #0 { +// CHECK-LABEL: @test_vrsqrteq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> [[VRSQRTEQ_V_I]]) #4 +// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a) #4 // CHECK: ret <4 x float> [[VRSQRTEQ_V1_I]] float32x4_t test_vrsqrteq_f32(float32x4_t a) { return vrsqrteq_f32(a); } -// CHECK-LABEL: define <4 x i32> @test_vrsqrteq_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vrsqrteq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[VRSQRTEQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> [[VRSQRTEQ_V_I]]) #4 +// CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a) #4 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]] uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { return vrsqrteq_u32(a); } - -// CHECK-LABEL: define <2 x float> @test_vrsqrts_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vrsqrts_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[VRSQRTS_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VRSQRTS_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> [[VRSQRTS_V_I]], <2 x float> [[VRSQRTS_V1_I]]) #4 +// CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %a, <2 x float> %b) #4 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSQRTS_V3_I]] to <2 x float> -// CHECK: ret <2 x float> [[TMP2]] +// CHECK: ret <2 x float> [[VRSQRTS_V2_I]] float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) { return vrsqrts_f32(a, b); } -// CHECK-LABEL: define <4 x float> @test_vrsqrtsq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vrsqrtsq_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[VRSQRTSQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VRSQRTSQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> [[VRSQRTSQ_V_I]], <4 x float> [[VRSQRTSQ_V1_I]]) #4 +// CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %a, <4 x float> %b) #4 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VRSQRTSQ_V3_I]] to <4 x float> -// CHECK: ret <4 x float> [[TMP2]] +// CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]] float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) { return vrsqrtsq_f32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vrsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_s8( // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> ) // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]] // CHECK: ret <8 x i8> [[VRSRA_N]] @@ -16321,7 +14148,7 @@ int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { return vrsra_n_s8(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vrsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -16333,7 +14160,7 @@ int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { return vrsra_n_s16(a, b, 1); } -// CHECK-LABEL: define <2 x i32> @test_vrsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -16345,7 +14172,7 @@ int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { return vrsra_n_s32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vrsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -16357,7 +14184,7 @@ int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { return vrsra_n_s64(a, b, 1); } -// CHECK-LABEL: define <8 x i8> @test_vrsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_u8( // CHECK: [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> ) // CHECK: [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]] // CHECK: ret <8 x i8> [[VRSRA_N]] @@ -16365,7 +14192,7 @@ uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { return vrsra_n_u8(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vrsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -16377,7 +14204,7 @@ uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { return vrsra_n_u16(a, b, 1); } -// CHECK-LABEL: define <2 x i32> @test_vrsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -16389,7 +14216,7 @@ uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { return vrsra_n_u32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vrsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsra_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -16401,7 +14228,7 @@ uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { return vrsra_n_u64(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_s8( // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> ) // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]] // CHECK: ret <16 x i8> [[VRSRA_N]] @@ -16409,7 +14236,7 @@ int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { return vrsraq_n_s8(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -16421,7 +14248,7 @@ int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { return vrsraq_n_s16(a, b, 1); } -// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -16433,7 +14260,7 @@ int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { return vrsraq_n_s32(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -16445,7 +14272,7 @@ int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { return vrsraq_n_s64(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vrsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_u8( // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> ) // CHECK: [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]] // CHECK: ret <16 x i8> [[VRSRA_N]] @@ -16453,7 +14280,7 @@ uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { return vrsraq_n_u8(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vrsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -16465,7 +14292,7 @@ uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { return vrsraq_n_u16(a, b, 1); } -// CHECK-LABEL: define <4 x i32> @test_vrsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -16477,7 +14304,7 @@ uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { return vrsraq_n_u32(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vrsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsraq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -16489,90 +14316,72 @@ uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) { return vrsraq_n_u64(a, b, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vrsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { return vrsubhn_s16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vrsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRSUBHN_V2_I]] int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { return vrsubhn_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vrsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRSUBHN_V2_I]] int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { return vrsubhn_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vrsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> [[VRSUBHN_V_I]], <8 x i16> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { return vrsubhn_u16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vrsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> [[VRSUBHN_V_I]], <4 x i32> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VRSUBHN_V2_I]] uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { return vrsubhn_u32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vrsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vrsubhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VRSUBHN_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VRSUBHN_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> [[VRSUBHN_V_I]], <2 x i64> [[VRSUBHN_V1_I]]) #4 +// CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VRSUBHN_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VRSUBHN_V2_I]] uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { return vrsubhn_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vset_lane_u8(i8 zeroext %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vset_lane_u8( // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 // CHECK: ret <8 x i8> [[VSET_LANE]] uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) { return vset_lane_u8(a, b, 7); } -// CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 zeroext %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vset_lane_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 @@ -16581,7 +14390,7 @@ uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) { return vset_lane_u16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vset_lane_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 @@ -16590,14 +14399,14 @@ uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) { return vset_lane_u32(a, b, 1); } -// CHECK-LABEL: define <8 x i8> @test_vset_lane_s8(i8 signext %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vset_lane_s8( // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 // CHECK: ret <8 x i8> [[VSET_LANE]] int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) { return vset_lane_s8(a, b, 7); } -// CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 signext %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vset_lane_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 @@ -16606,7 +14415,7 @@ int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) { return vset_lane_s16(a, b, 3); } -// CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vset_lane_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 @@ -16615,14 +14424,14 @@ int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) { return vset_lane_s32(a, b, 1); } -// CHECK-LABEL: define <8 x i8> @test_vset_lane_p8(i8 signext %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vset_lane_p8( // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7 // CHECK: ret <8 x i8> [[VSET_LANE]] poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) { return vset_lane_p8(a, b, 7); } -// CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 signext %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vset_lane_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 @@ -16631,7 +14440,7 @@ poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) { return vset_lane_p16(a, b, 3); } -// CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vset_lane_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1 @@ -16640,7 +14449,7 @@ float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) { return vset_lane_f32(a, b, 1); } -// CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 { +// CHECK-LABEL: @test_vset_lane_f16( // CHECK: [[__REINT_246:%.*]] = alloca half, align 2 // CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8 // CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8 @@ -16662,14 +14471,14 @@ float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) { return vset_lane_f16(*a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 zeroext %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_u8( // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 // CHECK: ret <16 x i8> [[VSET_LANE]] uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) { return vsetq_lane_u8(a, b, 15); } -// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 zeroext %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 @@ -16678,7 +14487,7 @@ uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) { return vsetq_lane_u16(a, b, 7); } -// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 @@ -16687,14 +14496,14 @@ uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) { return vsetq_lane_u32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 signext %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_s8( // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 // CHECK: ret <16 x i8> [[VSET_LANE]] int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) { return vsetq_lane_s8(a, b, 15); } -// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 signext %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 @@ -16703,7 +14512,7 @@ int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) { return vsetq_lane_s16(a, b, 7); } -// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 @@ -16712,14 +14521,14 @@ int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) { return vsetq_lane_s32(a, b, 3); } -// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 signext %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_p8( // CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15 // CHECK: ret <16 x i8> [[VSET_LANE]] poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) { return vsetq_lane_p8(a, b, 15); } -// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 signext %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 @@ -16728,7 +14537,7 @@ poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) { return vsetq_lane_p16(a, b, 7); } -// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3 @@ -16737,7 +14546,7 @@ float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { return vsetq_lane_f32(a, b, 3); } -// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_f16( // CHECK: [[__REINT_248:%.*]] = alloca half, align 2 // CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16 // CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16 @@ -16759,8 +14568,7 @@ float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) { return vsetq_lane_f16(*a, b, 3); } -// The optimizer is able to get rid of all moves now. -// CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vset_lane_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 @@ -16769,8 +14577,7 @@ int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) { return vset_lane_s64(a, b, 0); } -// The optimizer is able to get rid of all moves now. -// CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vset_lane_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 @@ -16779,7 +14586,7 @@ uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) { return vset_lane_u64(a, b, 0); } -// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 @@ -16788,7 +14595,7 @@ int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) { return vsetq_lane_s64(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsetq_lane_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 @@ -16797,193 +14604,155 @@ uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) { return vsetq_lane_u64(a, b, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vshl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vshl_s8( // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VSHL_V_I]] int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { return vshl_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vshl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vshl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VSHL_V2_I]] int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { return vshl_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vshl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vshl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VSHL_V2_I]] int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { return vshl_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vshl_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vshl_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VSHL_V2_I]] int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { return vshl_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vshl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vshl_u8( // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VSHL_V_I]] uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { return vshl_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vshl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vshl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> [[VSHL_V_I]], <4 x i16> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %a, <4 x i16> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <4 x i16> -// CHECK: ret <4 x i16> [[TMP2]] +// CHECK: ret <4 x i16> [[VSHL_V2_I]] uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { return vshl_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vshl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vshl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> [[VSHL_V_I]], <2 x i32> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %a, <2 x i32> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <2 x i32> -// CHECK: ret <2 x i32> [[TMP2]] +// CHECK: ret <2 x i32> [[VSHL_V2_I]] uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { return vshl_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vshl_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vshl_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[VSHL_V_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSHL_V1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> -// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> [[VSHL_V_I]], <1 x i64> [[VSHL_V1_I]]) #4 +// CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %a, <1 x i64> %b) #4 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[VSHL_V3_I]] to <1 x i64> -// CHECK: ret <1 x i64> [[TMP2]] +// CHECK: ret <1 x i64> [[VSHL_V2_I]] uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { return vshl_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vshlq_s8( // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VSHLQ_V_I]] int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { return vshlq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vshlq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VSHLQ_V2_I]] int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { return vshlq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vshlq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VSHLQ_V2_I]] int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { return vshlq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vshlq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vshlq_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VSHLQ_V2_I]] int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { return vshlq_s64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vshlq_u8( // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b) #4 // CHECK: ret <16 x i8> [[VSHLQ_V_I]] uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { return vshlq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vshlq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> [[VSHLQ_V_I]], <8 x i16> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %a, <8 x i16> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <8 x i16> -// CHECK: ret <8 x i16> [[TMP2]] +// CHECK: ret <8 x i16> [[VSHLQ_V2_I]] uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { return vshlq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vshlq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> [[VSHLQ_V_I]], <4 x i32> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %a, <4 x i32> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <4 x i32> -// CHECK: ret <4 x i32> [[TMP2]] +// CHECK: ret <4 x i32> [[VSHLQ_V2_I]] uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { return vshlq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vshlq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vshlq_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[VSHLQ_V_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSHLQ_V1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> [[VSHLQ_V_I]], <2 x i64> [[VSHLQ_V1_I]]) #4 +// CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %a, <2 x i64> %b) #4 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[VSHLQ_V3_I]] to <2 x i64> -// CHECK: ret <2 x i64> [[TMP2]] +// CHECK: ret <2 x i64> [[VSHLQ_V2_I]] uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { return vshlq_u64(a, b); } - -// CHECK-LABEL: define <8 x i16> @test_vshll_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshll_n_s8( // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], // CHECK: ret <8 x i16> [[VSHLL_N]] @@ -16991,7 +14760,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) { return vshll_n_s8(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vshll_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshll_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> @@ -17001,7 +14770,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a) { return vshll_n_s16(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vshll_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshll_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> @@ -17011,7 +14780,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a) { return vshll_n_s32(a, 1); } -// CHECK-LABEL: define <8 x i16> @test_vshll_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshll_n_u8( // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], // CHECK: ret <8 x i16> [[VSHLL_N]] @@ -17019,7 +14788,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a) { return vshll_n_u8(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vshll_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshll_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> @@ -17029,7 +14798,7 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a) { return vshll_n_u16(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vshll_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshll_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> @@ -17039,15 +14808,14 @@ uint64x2_t test_vshll_n_u32(uint32x2_t a) { return vshll_n_u32(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vshl_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshl_n_s8( // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, // CHECK: ret <8 x i8> [[VSHL_N]] int8x8_t test_vshl_n_s8(int8x8_t a) { return vshl_n_s8(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vshl_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshl_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], @@ -17056,7 +14824,7 @@ int16x4_t test_vshl_n_s16(int16x4_t a) { return vshl_n_s16(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vshl_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshl_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], @@ -17065,7 +14833,7 @@ int32x2_t test_vshl_n_s32(int32x2_t a) { return vshl_n_s32(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vshl_n_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vshl_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], @@ -17074,14 +14842,14 @@ int64x1_t test_vshl_n_s64(int64x1_t a) { return vshl_n_s64(a, 1); } -// CHECK-LABEL: define <8 x i8> @test_vshl_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshl_n_u8( // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, // CHECK: ret <8 x i8> [[VSHL_N]] uint8x8_t test_vshl_n_u8(uint8x8_t a) { return vshl_n_u8(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vshl_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshl_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], @@ -17090,7 +14858,7 @@ uint16x4_t test_vshl_n_u16(uint16x4_t a) { return vshl_n_u16(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vshl_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshl_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], @@ -17099,7 +14867,7 @@ uint32x2_t test_vshl_n_u32(uint32x2_t a) { return vshl_n_u32(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vshl_n_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vshl_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], @@ -17108,14 +14876,14 @@ uint64x1_t test_vshl_n_u64(uint64x1_t a) { return vshl_n_u64(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_s8( // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, // CHECK: ret <16 x i8> [[VSHL_N]] int8x16_t test_vshlq_n_s8(int8x16_t a) { return vshlq_n_s8(a, 1); } -// CHECK-LABEL: define <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], @@ -17124,7 +14892,7 @@ int16x8_t test_vshlq_n_s16(int16x8_t a) { return vshlq_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], @@ -17133,7 +14901,7 @@ int32x4_t test_vshlq_n_s32(int32x4_t a) { return vshlq_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vshlq_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], @@ -17142,14 +14910,14 @@ int64x2_t test_vshlq_n_s64(int64x2_t a) { return vshlq_n_s64(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vshlq_n_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_u8( // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, // CHECK: ret <16 x i8> [[VSHL_N]] uint8x16_t test_vshlq_n_u8(uint8x16_t a) { return vshlq_n_u8(a, 1); } -// CHECK-LABEL: define <8 x i16> @test_vshlq_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], @@ -17158,7 +14926,7 @@ uint16x8_t test_vshlq_n_u16(uint16x8_t a) { return vshlq_n_u16(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vshlq_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], @@ -17167,7 +14935,7 @@ uint32x4_t test_vshlq_n_u32(uint32x4_t a) { return vshlq_n_u32(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vshlq_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshlq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], @@ -17176,8 +14944,7 @@ uint64x2_t test_vshlq_n_u64(uint64x2_t a) { return vshlq_n_u64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], @@ -17187,7 +14954,7 @@ int8x8_t test_vshrn_n_s16(int16x8_t a) { return vshrn_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], @@ -17197,7 +14964,7 @@ int16x4_t test_vshrn_n_s32(int32x4_t a) { return vshrn_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], @@ -17207,7 +14974,7 @@ int32x2_t test_vshrn_n_s64(int64x2_t a) { return vshrn_n_s64(a, 1); } -// CHECK-LABEL: define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], @@ -17217,7 +14984,7 @@ uint8x8_t test_vshrn_n_u16(uint16x8_t a) { return vshrn_n_u16(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], @@ -17227,7 +14994,7 @@ uint16x4_t test_vshrn_n_u32(uint32x4_t a) { return vshrn_n_u32(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshrn_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], @@ -17237,15 +15004,14 @@ uint32x2_t test_vshrn_n_u64(uint64x2_t a) { return vshrn_n_u64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshr_n_s8( // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, // CHECK: ret <8 x i8> [[VSHR_N]] int8x8_t test_vshr_n_s8(int8x8_t a) { return vshr_n_s8(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshr_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], @@ -17254,7 +15020,7 @@ int16x4_t test_vshr_n_s16(int16x4_t a) { return vshr_n_s16(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshr_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], @@ -17263,7 +15029,7 @@ int32x2_t test_vshr_n_s32(int32x2_t a) { return vshr_n_s32(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vshr_n_s64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vshr_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], @@ -17272,14 +15038,14 @@ int64x1_t test_vshr_n_s64(int64x1_t a) { return vshr_n_s64(a, 1); } -// CHECK-LABEL: define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) #0 { +// CHECK-LABEL: @test_vshr_n_u8( // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, // CHECK: ret <8 x i8> [[VSHR_N]] uint8x8_t test_vshr_n_u8(uint8x8_t a) { return vshr_n_u8(a, 1); } -// CHECK-LABEL: define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) #0 { +// CHECK-LABEL: @test_vshr_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], @@ -17288,7 +15054,7 @@ uint16x4_t test_vshr_n_u16(uint16x4_t a) { return vshr_n_u16(a, 1); } -// CHECK-LABEL: define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) #0 { +// CHECK-LABEL: @test_vshr_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], @@ -17297,7 +15063,7 @@ uint32x2_t test_vshr_n_u32(uint32x2_t a) { return vshr_n_u32(a, 1); } -// CHECK-LABEL: define <1 x i64> @test_vshr_n_u64(<1 x i64> %a) #0 { +// CHECK-LABEL: @test_vshr_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], @@ -17306,14 +15072,14 @@ uint64x1_t test_vshr_n_u64(uint64x1_t a) { return vshr_n_u64(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_s8( // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, // CHECK: ret <16 x i8> [[VSHR_N]] int8x16_t test_vshrq_n_s8(int8x16_t a) { return vshrq_n_s8(a, 1); } -// CHECK-LABEL: define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], @@ -17322,7 +15088,7 @@ int16x8_t test_vshrq_n_s16(int16x8_t a) { return vshrq_n_s16(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], @@ -17331,7 +15097,7 @@ int32x4_t test_vshrq_n_s32(int32x4_t a) { return vshrq_n_s32(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], @@ -17340,14 +15106,14 @@ int64x2_t test_vshrq_n_s64(int64x2_t a) { return vshrq_n_s64(a, 1); } -// CHECK-LABEL: define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_u8( // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, // CHECK: ret <16 x i8> [[VSHR_N]] uint8x16_t test_vshrq_n_u8(uint8x16_t a) { return vshrq_n_u8(a, 1); } -// CHECK-LABEL: define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], @@ -17356,7 +15122,7 @@ uint16x8_t test_vshrq_n_u16(uint16x8_t a) { return vshrq_n_u16(a, 1); } -// CHECK-LABEL: define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], @@ -17365,7 +15131,7 @@ uint32x4_t test_vshrq_n_u32(uint32x4_t a) { return vshrq_n_u32(a, 1); } -// CHECK-LABEL: define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) #0 { +// CHECK-LABEL: @test_vshrq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], @@ -17374,15 +15140,14 @@ uint64x2_t test_vshrq_n_u64(uint64x2_t a) { return vshrq_n_u64(a, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vsli_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsli_n_s8( // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) // CHECK: ret <8 x i8> [[VSLI_N]] int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { return vsli_n_s8(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vsli_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsli_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -17393,7 +15158,7 @@ int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { return vsli_n_s16(a, b, 1); } -// CHECK-LABEL: define <2 x i32> @test_vsli_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsli_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -17404,7 +15169,7 @@ int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { return vsli_n_s32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vsli_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsli_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -17415,14 +15180,14 @@ int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { return vsli_n_s64(a, b, 1); } -// CHECK-LABEL: define <8 x i8> @test_vsli_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsli_n_u8( // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) // CHECK: ret <8 x i8> [[VSLI_N]] uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { return vsli_n_u8(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vsli_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsli_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -17433,7 +15198,7 @@ uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { return vsli_n_u16(a, b, 1); } -// CHECK-LABEL: define <2 x i32> @test_vsli_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsli_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -17444,7 +15209,7 @@ uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { return vsli_n_u32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vsli_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsli_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -17455,14 +15220,14 @@ uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) { return vsli_n_u64(a, b, 1); } -// CHECK-LABEL: define <8 x i8> @test_vsli_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsli_n_p8( // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) // CHECK: ret <8 x i8> [[VSLI_N]] poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { return vsli_n_p8(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vsli_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsli_n_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -17473,14 +15238,14 @@ poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { return vsli_n_p16(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_s8( // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) // CHECK: ret <16 x i8> [[VSLI_N]] int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { return vsliq_n_s8(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -17491,7 +15256,7 @@ int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { return vsliq_n_s16(a, b, 1); } -// CHECK-LABEL: define <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -17502,7 +15267,7 @@ int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { return vsliq_n_s32(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vsliq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -17513,14 +15278,14 @@ int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { return vsliq_n_s64(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_u8( // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) // CHECK: ret <16 x i8> [[VSLI_N]] uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { return vsliq_n_u8(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -17531,7 +15296,7 @@ uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { return vsliq_n_u16(a, b, 1); } -// CHECK-LABEL: define <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -17542,7 +15307,7 @@ uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { return vsliq_n_u32(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vsliq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -17553,14 +15318,14 @@ uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { return vsliq_n_u64(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vsliq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_p8( // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) // CHECK: ret <16 x i8> [[VSLI_N]] poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { return vsliq_n_p8(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vsliq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsliq_n_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -17571,8 +15336,7 @@ poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { return vsliq_n_p16(a, b, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsra_n_s8( // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] @@ -17580,7 +15344,7 @@ int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { return vsra_n_s8(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsra_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -17592,7 +15356,7 @@ int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { return vsra_n_s16(a, b, 1); } -// CHECK-LABEL: define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsra_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -17604,7 +15368,7 @@ int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { return vsra_n_s32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vsra_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsra_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -17616,7 +15380,7 @@ int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { return vsra_n_s64(a, b, 1); } -// CHECK-LABEL: define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsra_n_u8( // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] // CHECK: ret <8 x i8> [[TMP0]] @@ -17624,7 +15388,7 @@ uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { return vsra_n_u8(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsra_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -17636,7 +15400,7 @@ uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { return vsra_n_u16(a, b, 1); } -// CHECK-LABEL: define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsra_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -17648,7 +15412,7 @@ uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { return vsra_n_u32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vsra_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsra_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -17660,7 +15424,7 @@ uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { return vsra_n_u64(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_s8( // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] @@ -17668,7 +15432,7 @@ int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { return vsraq_n_s8(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -17680,7 +15444,7 @@ int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { return vsraq_n_s16(a, b, 1); } -// CHECK-LABEL: define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -17692,7 +15456,7 @@ int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { return vsraq_n_s32(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -17704,7 +15468,7 @@ int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { return vsraq_n_s64(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_u8( // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] // CHECK: ret <16 x i8> [[TMP0]] @@ -17712,7 +15476,7 @@ uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { return vsraq_n_u8(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -17724,7 +15488,7 @@ uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { return vsraq_n_u16(a, b, 1); } -// CHECK-LABEL: define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -17736,7 +15500,7 @@ uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { return vsraq_n_u32(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsraq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -17748,15 +15512,14 @@ uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { return vsraq_n_u64(a, b, 1); } - -// CHECK-LABEL: define <8 x i8> @test_vsri_n_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsri_n_s8( // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) // CHECK: ret <8 x i8> [[VSLI_N]] int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { return vsri_n_s8(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vsri_n_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsri_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -17767,7 +15530,7 @@ int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { return vsri_n_s16(a, b, 1); } -// CHECK-LABEL: define <2 x i32> @test_vsri_n_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsri_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -17778,7 +15541,7 @@ int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { return vsri_n_s32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vsri_n_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsri_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -17789,14 +15552,14 @@ int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { return vsri_n_s64(a, b, 1); } -// CHECK-LABEL: define <8 x i8> @test_vsri_n_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsri_n_u8( // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) // CHECK: ret <8 x i8> [[VSLI_N]] uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) { return vsri_n_u8(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vsri_n_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsri_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -17807,7 +15570,7 @@ uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) { return vsri_n_u16(a, b, 1); } -// CHECK-LABEL: define <2 x i32> @test_vsri_n_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsri_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> @@ -17818,7 +15581,7 @@ uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) { return vsri_n_u32(a, b, 1); } -// CHECK-LABEL: define <1 x i64> @test_vsri_n_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsri_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> @@ -17829,14 +15592,14 @@ uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { return vsri_n_u64(a, b, 1); } -// CHECK-LABEL: define <8 x i8> @test_vsri_n_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsri_n_p8( // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> ) // CHECK: ret <8 x i8> [[VSLI_N]] poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { return vsri_n_p8(a, b, 1); } -// CHECK-LABEL: define <4 x i16> @test_vsri_n_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsri_n_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> @@ -17847,14 +15610,14 @@ poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { return vsri_n_p16(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_s8( // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) // CHECK: ret <16 x i8> [[VSLI_N]] int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { return vsriq_n_s8(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -17865,7 +15628,7 @@ int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { return vsriq_n_s16(a, b, 1); } -// CHECK-LABEL: define <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -17876,7 +15639,7 @@ int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { return vsriq_n_s32(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vsriq_n_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -17887,14 +15650,14 @@ int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { return vsriq_n_s64(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_u8( // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) // CHECK: ret <16 x i8> [[VSLI_N]] uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) { return vsriq_n_u8(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -17905,7 +15668,7 @@ uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) { return vsriq_n_u16(a, b, 1); } -// CHECK-LABEL: define <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> @@ -17916,7 +15679,7 @@ uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) { return vsriq_n_u32(a, b, 1); } -// CHECK-LABEL: define <2 x i64> @test_vsriq_n_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> @@ -17927,14 +15690,14 @@ uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) { return vsriq_n_u64(a, b, 1); } -// CHECK-LABEL: define <16 x i8> @test_vsriq_n_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_p8( // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) // CHECK: ret <16 x i8> [[VSLI_N]] poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { return vsriq_n_p8(a, b, 1); } -// CHECK-LABEL: define <8 x i16> @test_vsriq_n_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsriq_n_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> @@ -17945,15 +15708,14 @@ poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { return vsriq_n_p16(a, b, 1); } - -// CHECK-LABEL: define void @test_vst1q_u8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1q_u8( // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) // CHECK: ret void void test_vst1q_u8(uint8_t * a, uint8x16_t b) { vst1q_u8(a, b); } -// CHECK-LABEL: define void @test_vst1q_u16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1q_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -17963,7 +15725,7 @@ void test_vst1q_u16(uint16_t * a, uint16x8_t b) { vst1q_u16(a, b); } -// CHECK-LABEL: define void @test_vst1q_u32(i32* %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1q_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> @@ -17973,7 +15735,7 @@ void test_vst1q_u32(uint32_t * a, uint32x4_t b) { vst1q_u32(a, b); } -// CHECK-LABEL: define void @test_vst1q_u64(i64* %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1q_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> @@ -17983,14 +15745,14 @@ void test_vst1q_u64(uint64_t * a, uint64x2_t b) { vst1q_u64(a, b); } -// CHECK-LABEL: define void @test_vst1q_s8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1q_s8( // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) // CHECK: ret void void test_vst1q_s8(int8_t * a, int8x16_t b) { vst1q_s8(a, b); } -// CHECK-LABEL: define void @test_vst1q_s16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1q_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -18000,7 +15762,7 @@ void test_vst1q_s16(int16_t * a, int16x8_t b) { vst1q_s16(a, b); } -// CHECK-LABEL: define void @test_vst1q_s32(i32* %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1q_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> @@ -18010,7 +15772,7 @@ void test_vst1q_s32(int32_t * a, int32x4_t b) { vst1q_s32(a, b); } -// CHECK-LABEL: define void @test_vst1q_s64(i64* %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1q_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> @@ -18020,7 +15782,7 @@ void test_vst1q_s64(int64_t * a, int64x2_t b) { vst1q_s64(a, b); } -// CHECK-LABEL: define void @test_vst1q_f16(half* %a, <8 x half> %b) #0 { +// CHECK-LABEL: @test_vst1q_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -18030,7 +15792,7 @@ void test_vst1q_f16(float16_t * a, float16x8_t b) { vst1q_f16(a, b); } -// CHECK-LABEL: define void @test_vst1q_f32(float* %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vst1q_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> @@ -18040,14 +15802,14 @@ void test_vst1q_f32(float32_t * a, float32x4_t b) { vst1q_f32(a, b); } -// CHECK-LABEL: define void @test_vst1q_p8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1q_p8( // CHECK: call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1) // CHECK: ret void void test_vst1q_p8(poly8_t * a, poly8x16_t b) { vst1q_p8(a, b); } -// CHECK-LABEL: define void @test_vst1q_p16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1q_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -18057,14 +15819,14 @@ void test_vst1q_p16(poly16_t * a, poly16x8_t b) { vst1q_p16(a, b); } -// CHECK-LABEL: define void @test_vst1_u8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1_u8( // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) // CHECK: ret void void test_vst1_u8(uint8_t * a, uint8x8_t b) { vst1_u8(a, b); } -// CHECK-LABEL: define void @test_vst1_u16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -18074,7 +15836,7 @@ void test_vst1_u16(uint16_t * a, uint16x4_t b) { vst1_u16(a, b); } -// CHECK-LABEL: define void @test_vst1_u32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -18084,7 +15846,7 @@ void test_vst1_u32(uint32_t * a, uint32x2_t b) { vst1_u32(a, b); } -// CHECK-LABEL: define void @test_vst1_u64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -18094,14 +15856,14 @@ void test_vst1_u64(uint64_t * a, uint64x1_t b) { vst1_u64(a, b); } -// CHECK-LABEL: define void @test_vst1_s8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1_s8( // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) // CHECK: ret void void test_vst1_s8(int8_t * a, int8x8_t b) { vst1_s8(a, b); } -// CHECK-LABEL: define void @test_vst1_s16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -18111,7 +15873,7 @@ void test_vst1_s16(int16_t * a, int16x4_t b) { vst1_s16(a, b); } -// CHECK-LABEL: define void @test_vst1_s32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -18121,7 +15883,7 @@ void test_vst1_s32(int32_t * a, int32x2_t b) { vst1_s32(a, b); } -// CHECK-LABEL: define void @test_vst1_s64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -18131,7 +15893,7 @@ void test_vst1_s64(int64_t * a, int64x1_t b) { vst1_s64(a, b); } -// CHECK-LABEL: define void @test_vst1_f16(half* %a, <4 x half> %b) #0 { +// CHECK-LABEL: @test_vst1_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -18141,7 +15903,7 @@ void test_vst1_f16(float16_t * a, float16x4_t b) { vst1_f16(a, b); } -// CHECK-LABEL: define void @test_vst1_f32(float* %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vst1_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> @@ -18151,14 +15913,14 @@ void test_vst1_f32(float32_t * a, float32x2_t b) { vst1_f32(a, b); } -// CHECK-LABEL: define void @test_vst1_p8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1_p8( // CHECK: call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1) // CHECK: ret void void test_vst1_p8(poly8_t * a, poly8x8_t b) { vst1_p8(a, b); } -// CHECK-LABEL: define void @test_vst1_p16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -18168,8 +15930,7 @@ void test_vst1_p16(poly16_t * a, poly16x4_t b) { vst1_p16(a, b); } - -// CHECK-LABEL: define void @test_vst1q_lane_u8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_u8( // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 // CHECK: store i8 [[TMP0]], i8* %a, align 1 // CHECK: ret void @@ -18177,7 +15938,7 @@ void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) { vst1q_lane_u8(a, b, 15); } -// CHECK-LABEL: define void @test_vst1q_lane_u16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -18189,7 +15950,7 @@ void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) { vst1q_lane_u16(a, b, 7); } -// CHECK-LABEL: define void @test_vst1q_lane_u32(i32* %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> @@ -18201,7 +15962,7 @@ void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) { vst1q_lane_u32(a, b, 3); } -// CHECK-LABEL: define void @test_vst1q_lane_u64(i64* %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> @@ -18212,7 +15973,7 @@ void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) { vst1q_lane_u64(a, b, 1); } -// CHECK-LABEL: define void @test_vst1q_lane_s8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_s8( // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 // CHECK: store i8 [[TMP0]], i8* %a, align 1 // CHECK: ret void @@ -18220,7 +15981,7 @@ void test_vst1q_lane_s8(int8_t * a, int8x16_t b) { vst1q_lane_s8(a, b, 15); } -// CHECK-LABEL: define void @test_vst1q_lane_s16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -18232,7 +15993,7 @@ void test_vst1q_lane_s16(int16_t * a, int16x8_t b) { vst1q_lane_s16(a, b, 7); } -// CHECK-LABEL: define void @test_vst1q_lane_s32(i32* %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> @@ -18244,7 +16005,7 @@ void test_vst1q_lane_s32(int32_t * a, int32x4_t b) { vst1q_lane_s32(a, b, 3); } -// CHECK-LABEL: define void @test_vst1q_lane_s64(i64* %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> @@ -18255,7 +16016,7 @@ void test_vst1q_lane_s64(int64_t * a, int64x2_t b) { vst1q_lane_s64(a, b, 1); } -// CHECK-LABEL: define void @test_vst1q_lane_f16(half* %a, <8 x half> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -18267,7 +16028,7 @@ void test_vst1q_lane_f16(float16_t * a, float16x8_t b) { vst1q_lane_f16(a, b, 7); } -// CHECK-LABEL: define void @test_vst1q_lane_f32(float* %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> @@ -18279,7 +16040,7 @@ void test_vst1q_lane_f32(float32_t * a, float32x4_t b) { vst1q_lane_f32(a, b, 3); } -// CHECK-LABEL: define void @test_vst1q_lane_p8(i8* %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_p8( // CHECK: [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15 // CHECK: store i8 [[TMP0]], i8* %a, align 1 // CHECK: ret void @@ -18287,7 +16048,7 @@ void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) { vst1q_lane_p8(a, b, 15); } -// CHECK-LABEL: define void @test_vst1q_lane_p16(i16* %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1q_lane_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> @@ -18299,7 +16060,7 @@ void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) { vst1q_lane_p16(a, b, 7); } -// CHECK-LABEL: define void @test_vst1_lane_u8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_u8( // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 // CHECK: store i8 [[TMP0]], i8* %a, align 1 // CHECK: ret void @@ -18307,7 +16068,7 @@ void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) { vst1_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vst1_lane_u16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_u16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -18319,7 +16080,7 @@ void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) { vst1_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vst1_lane_u32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_u32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -18331,7 +16092,7 @@ void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) { vst1_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vst1_lane_u64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_u64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -18343,7 +16104,7 @@ void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) { vst1_lane_u64(a, b, 0); } -// CHECK-LABEL: define void @test_vst1_lane_s8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_s8( // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 // CHECK: store i8 [[TMP0]], i8* %a, align 1 // CHECK: ret void @@ -18351,7 +16112,7 @@ void test_vst1_lane_s8(int8_t * a, int8x8_t b) { vst1_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vst1_lane_s16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_s16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -18363,7 +16124,7 @@ void test_vst1_lane_s16(int16_t * a, int16x4_t b) { vst1_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vst1_lane_s32(i32* %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_s32( // CHECK: [[TMP0:%.*]] = bitcast i32* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> @@ -18375,7 +16136,7 @@ void test_vst1_lane_s32(int32_t * a, int32x2_t b) { vst1_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vst1_lane_s64(i64* %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_s64( // CHECK: [[TMP0:%.*]] = bitcast i64* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> @@ -18387,7 +16148,7 @@ void test_vst1_lane_s64(int64_t * a, int64x1_t b) { vst1_lane_s64(a, b, 0); } -// CHECK-LABEL: define void @test_vst1_lane_f16(half* %a, <4 x half> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_f16( // CHECK: [[TMP0:%.*]] = bitcast half* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -18399,7 +16160,7 @@ void test_vst1_lane_f16(float16_t * a, float16x4_t b) { vst1_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vst1_lane_f32(float* %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_f32( // CHECK: [[TMP0:%.*]] = bitcast float* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> @@ -18411,7 +16172,7 @@ void test_vst1_lane_f32(float32_t * a, float32x2_t b) { vst1_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vst1_lane_p8(i8* %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_p8( // CHECK: [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7 // CHECK: store i8 [[TMP0]], i8* %a, align 1 // CHECK: ret void @@ -18419,7 +16180,7 @@ void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) { vst1_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vst1_lane_p16(i16* %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vst1_lane_p16( // CHECK: [[TMP0:%.*]] = bitcast i16* %a to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> @@ -18431,8 +16192,7 @@ void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) { vst1_lane_p16(a, b, 3); } - -// CHECK-LABEL: define void @test_vst2q_u8(i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 @@ -18453,7 +16213,7 @@ void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) { vst2q_u8(a, b); } -// CHECK-LABEL: define void @test_vst2q_u16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 @@ -18479,7 +16239,7 @@ void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) { vst2q_u16(a, b); } -// CHECK-LABEL: define void @test_vst2q_u32(i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 @@ -18505,7 +16265,7 @@ void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) { vst2q_u32(a, b); } -// CHECK-LABEL: define void @test_vst2q_s8(i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 @@ -18526,7 +16286,7 @@ void test_vst2q_s8(int8_t * a, int8x16x2_t b) { vst2q_s8(a, b); } -// CHECK-LABEL: define void @test_vst2q_s16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 @@ -18552,7 +16312,7 @@ void test_vst2q_s16(int16_t * a, int16x8x2_t b) { vst2q_s16(a, b); } -// CHECK-LABEL: define void @test_vst2q_s32(i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 @@ -18578,7 +16338,7 @@ void test_vst2q_s32(int32_t * a, int32x4x2_t b) { vst2q_s32(a, b); } -// CHECK-LABEL: define void @test_vst2q_f16(half* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 @@ -18604,7 +16364,7 @@ void test_vst2q_f16(float16_t * a, float16x8x2_t b) { vst2q_f16(a, b); } -// CHECK-LABEL: define void @test_vst2q_f32(float* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 @@ -18630,7 +16390,7 @@ void test_vst2q_f32(float32_t * a, float32x4x2_t b) { vst2q_f32(a, b); } -// CHECK-LABEL: define void @test_vst2q_p8(i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 @@ -18651,7 +16411,7 @@ void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) { vst2q_p8(a, b); } -// CHECK-LABEL: define void @test_vst2q_p16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 @@ -18677,7 +16437,7 @@ void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) { vst2q_p16(a, b); } -// CHECK-LABEL: define void @test_vst2_u8(i8* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 @@ -18698,7 +16458,7 @@ void test_vst2_u8(uint8_t * a, uint8x8x2_t b) { vst2_u8(a, b); } -// CHECK-LABEL: define void @test_vst2_u16(i16* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 @@ -18724,7 +16484,7 @@ void test_vst2_u16(uint16_t * a, uint16x4x2_t b) { vst2_u16(a, b); } -// CHECK-LABEL: define void @test_vst2_u32(i32* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 @@ -18750,7 +16510,7 @@ void test_vst2_u32(uint32_t * a, uint32x2x2_t b) { vst2_u32(a, b); } -// CHECK-LABEL: define void @test_vst2_u64(i64* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_u64( // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0 @@ -18776,7 +16536,7 @@ void test_vst2_u64(uint64_t * a, uint64x1x2_t b) { vst2_u64(a, b); } -// CHECK-LABEL: define void @test_vst2_s8(i8* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 @@ -18797,7 +16557,7 @@ void test_vst2_s8(int8_t * a, int8x8x2_t b) { vst2_s8(a, b); } -// CHECK-LABEL: define void @test_vst2_s16(i16* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 @@ -18823,7 +16583,7 @@ void test_vst2_s16(int16_t * a, int16x4x2_t b) { vst2_s16(a, b); } -// CHECK-LABEL: define void @test_vst2_s32(i32* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 @@ -18849,7 +16609,7 @@ void test_vst2_s32(int32_t * a, int32x2x2_t b) { vst2_s32(a, b); } -// CHECK-LABEL: define void @test_vst2_s64(i64* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_s64( // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0 @@ -18875,7 +16635,7 @@ void test_vst2_s64(int64_t * a, int64x1x2_t b) { vst2_s64(a, b); } -// CHECK-LABEL: define void @test_vst2_f16(half* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 @@ -18901,7 +16661,7 @@ void test_vst2_f16(float16_t * a, float16x4x2_t b) { vst2_f16(a, b); } -// CHECK-LABEL: define void @test_vst2_f32(float* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 @@ -18927,7 +16687,7 @@ void test_vst2_f32(float32_t * a, float32x2x2_t b) { vst2_f32(a, b); } -// CHECK-LABEL: define void @test_vst2_p8(i8* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 @@ -18948,7 +16708,7 @@ void test_vst2_p8(poly8_t * a, poly8x8x2_t b) { vst2_p8(a, b); } -// CHECK-LABEL: define void @test_vst2_p16(i16* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 @@ -18974,8 +16734,7 @@ void test_vst2_p16(poly16_t * a, poly16x4x2_t b) { vst2_p16(a, b); } - -// CHECK-LABEL: define void @test_vst2q_lane_u16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0 @@ -19001,7 +16760,7 @@ void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) { vst2q_lane_u16(a, b, 7); } -// CHECK-LABEL: define void @test_vst2q_lane_u32(i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0 @@ -19027,7 +16786,7 @@ void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) { vst2q_lane_u32(a, b, 3); } -// CHECK-LABEL: define void @test_vst2q_lane_s16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0 @@ -19053,7 +16812,7 @@ void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) { vst2q_lane_s16(a, b, 7); } -// CHECK-LABEL: define void @test_vst2q_lane_s32(i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0 @@ -19079,7 +16838,7 @@ void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) { vst2q_lane_s32(a, b, 3); } -// CHECK-LABEL: define void @test_vst2q_lane_f16(half* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0 @@ -19105,7 +16864,7 @@ void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) { vst2q_lane_f16(a, b, 7); } -// CHECK-LABEL: define void @test_vst2q_lane_f32(float* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0 @@ -19131,7 +16890,7 @@ void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) { vst2q_lane_f32(a, b, 3); } -// CHECK-LABEL: define void @test_vst2q_lane_p16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2q_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0 @@ -19157,7 +16916,7 @@ void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) { vst2q_lane_p16(a, b, 7); } -// CHECK-LABEL: define void @test_vst2_lane_u8(i8* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_lane_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 @@ -19178,7 +16937,7 @@ void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) { vst2_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vst2_lane_u16(i16* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0 @@ -19204,7 +16963,7 @@ void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) { vst2_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vst2_lane_u32(i32* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0 @@ -19230,7 +16989,7 @@ void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) { vst2_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vst2_lane_s8(i8* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_lane_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 @@ -19251,7 +17010,7 @@ void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) { vst2_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vst2_lane_s16(i16* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0 @@ -19277,7 +17036,7 @@ void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) { vst2_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vst2_lane_s32(i32* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0 @@ -19303,7 +17062,7 @@ void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) { vst2_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vst2_lane_f16(half* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0 @@ -19329,7 +17088,7 @@ void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) { vst2_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vst2_lane_f32(float* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0 @@ -19355,7 +17114,7 @@ void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) { vst2_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vst2_lane_p8(i8* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_lane_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 @@ -19376,7 +17135,7 @@ void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) { vst2_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vst2_lane_p16(i16* %a, [2 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst2_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0 @@ -19402,8 +17161,7 @@ void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) { vst2_lane_p16(a, b, 3); } - -// CHECK-LABEL: define void @test_vst3q_u8(i8* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 @@ -19427,7 +17185,7 @@ void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) { vst3q_u8(a, b); } -// CHECK-LABEL: define void @test_vst3q_u16(i16* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 @@ -19458,7 +17216,7 @@ void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) { vst3q_u16(a, b); } -// CHECK-LABEL: define void @test_vst3q_u32(i32* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 @@ -19489,7 +17247,7 @@ void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) { vst3q_u32(a, b); } -// CHECK-LABEL: define void @test_vst3q_s8(i8* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 @@ -19513,7 +17271,7 @@ void test_vst3q_s8(int8_t * a, int8x16x3_t b) { vst3q_s8(a, b); } -// CHECK-LABEL: define void @test_vst3q_s16(i16* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 @@ -19544,7 +17302,7 @@ void test_vst3q_s16(int16_t * a, int16x8x3_t b) { vst3q_s16(a, b); } -// CHECK-LABEL: define void @test_vst3q_s32(i32* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 @@ -19575,7 +17333,7 @@ void test_vst3q_s32(int32_t * a, int32x4x3_t b) { vst3q_s32(a, b); } -// CHECK-LABEL: define void @test_vst3q_f16(half* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 @@ -19606,7 +17364,7 @@ void test_vst3q_f16(float16_t * a, float16x8x3_t b) { vst3q_f16(a, b); } -// CHECK-LABEL: define void @test_vst3q_f32(float* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 @@ -19637,7 +17395,7 @@ void test_vst3q_f32(float32_t * a, float32x4x3_t b) { vst3q_f32(a, b); } -// CHECK-LABEL: define void @test_vst3q_p8(i8* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 @@ -19661,7 +17419,7 @@ void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) { vst3q_p8(a, b); } -// CHECK-LABEL: define void @test_vst3q_p16(i16* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 @@ -19692,7 +17450,7 @@ void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) { vst3q_p16(a, b); } -// CHECK-LABEL: define void @test_vst3_u8(i8* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 @@ -19716,7 +17474,7 @@ void test_vst3_u8(uint8_t * a, uint8x8x3_t b) { vst3_u8(a, b); } -// CHECK-LABEL: define void @test_vst3_u16(i16* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 @@ -19747,7 +17505,7 @@ void test_vst3_u16(uint16_t * a, uint16x4x3_t b) { vst3_u16(a, b); } -// CHECK-LABEL: define void @test_vst3_u32(i32* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 @@ -19778,7 +17536,7 @@ void test_vst3_u32(uint32_t * a, uint32x2x3_t b) { vst3_u32(a, b); } -// CHECK-LABEL: define void @test_vst3_u64(i64* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_u64( // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0 @@ -19809,7 +17567,7 @@ void test_vst3_u64(uint64_t * a, uint64x1x3_t b) { vst3_u64(a, b); } -// CHECK-LABEL: define void @test_vst3_s8(i8* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 @@ -19833,7 +17591,7 @@ void test_vst3_s8(int8_t * a, int8x8x3_t b) { vst3_s8(a, b); } -// CHECK-LABEL: define void @test_vst3_s16(i16* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 @@ -19864,7 +17622,7 @@ void test_vst3_s16(int16_t * a, int16x4x3_t b) { vst3_s16(a, b); } -// CHECK-LABEL: define void @test_vst3_s32(i32* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 @@ -19895,7 +17653,7 @@ void test_vst3_s32(int32_t * a, int32x2x3_t b) { vst3_s32(a, b); } -// CHECK-LABEL: define void @test_vst3_s64(i64* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_s64( // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0 @@ -19926,7 +17684,7 @@ void test_vst3_s64(int64_t * a, int64x1x3_t b) { vst3_s64(a, b); } -// CHECK-LABEL: define void @test_vst3_f16(half* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 @@ -19957,7 +17715,7 @@ void test_vst3_f16(float16_t * a, float16x4x3_t b) { vst3_f16(a, b); } -// CHECK-LABEL: define void @test_vst3_f32(float* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 @@ -19988,7 +17746,7 @@ void test_vst3_f32(float32_t * a, float32x2x3_t b) { vst3_f32(a, b); } -// CHECK-LABEL: define void @test_vst3_p8(i8* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 @@ -20012,7 +17770,7 @@ void test_vst3_p8(poly8_t * a, poly8x8x3_t b) { vst3_p8(a, b); } -// CHECK-LABEL: define void @test_vst3_p16(i16* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 @@ -20043,8 +17801,7 @@ void test_vst3_p16(poly16_t * a, poly16x4x3_t b) { vst3_p16(a, b); } - -// CHECK-LABEL: define void @test_vst3q_lane_u16(i16* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0 @@ -20075,7 +17832,7 @@ void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) { vst3q_lane_u16(a, b, 7); } -// CHECK-LABEL: define void @test_vst3q_lane_u32(i32* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0 @@ -20106,7 +17863,7 @@ void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) { vst3q_lane_u32(a, b, 3); } -// CHECK-LABEL: define void @test_vst3q_lane_s16(i16* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0 @@ -20137,7 +17894,7 @@ void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) { vst3q_lane_s16(a, b, 7); } -// CHECK-LABEL: define void @test_vst3q_lane_s32(i32* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0 @@ -20168,7 +17925,7 @@ void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) { vst3q_lane_s32(a, b, 3); } -// CHECK-LABEL: define void @test_vst3q_lane_f16(half* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0 @@ -20199,7 +17956,7 @@ void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) { vst3q_lane_f16(a, b, 7); } -// CHECK-LABEL: define void @test_vst3q_lane_f32(float* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0 @@ -20230,7 +17987,7 @@ void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) { vst3q_lane_f32(a, b, 3); } -// CHECK-LABEL: define void @test_vst3q_lane_p16(i16* %a, [6 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3q_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0 @@ -20261,7 +18018,7 @@ void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) { vst3q_lane_p16(a, b, 7); } -// CHECK-LABEL: define void @test_vst3_lane_u8(i8* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_lane_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 @@ -20285,7 +18042,7 @@ void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) { vst3_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vst3_lane_u16(i16* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0 @@ -20316,7 +18073,7 @@ void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) { vst3_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vst3_lane_u32(i32* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0 @@ -20347,7 +18104,7 @@ void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) { vst3_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vst3_lane_s8(i8* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_lane_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 @@ -20371,7 +18128,7 @@ void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) { vst3_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vst3_lane_s16(i16* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0 @@ -20402,7 +18159,7 @@ void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) { vst3_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vst3_lane_s32(i32* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0 @@ -20433,7 +18190,7 @@ void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) { vst3_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vst3_lane_f16(half* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0 @@ -20464,7 +18221,7 @@ void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) { vst3_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vst3_lane_f32(float* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0 @@ -20495,7 +18252,7 @@ void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) { vst3_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vst3_lane_p8(i8* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_lane_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 @@ -20519,7 +18276,7 @@ void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) { vst3_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vst3_lane_p16(i16* %a, [3 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst3_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0 @@ -20550,8 +18307,7 @@ void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) { vst3_lane_p16(a, b, 3); } - -// CHECK-LABEL: define void @test_vst4q_u8(i8* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 @@ -20578,7 +18334,7 @@ void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) { vst4q_u8(a, b); } -// CHECK-LABEL: define void @test_vst4q_u16(i16* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 @@ -20614,7 +18370,7 @@ void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) { vst4q_u16(a, b); } -// CHECK-LABEL: define void @test_vst4q_u32(i32* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 @@ -20650,7 +18406,7 @@ void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) { vst4q_u32(a, b); } -// CHECK-LABEL: define void @test_vst4q_s8(i8* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 @@ -20677,7 +18433,7 @@ void test_vst4q_s8(int8_t * a, int8x16x4_t b) { vst4q_s8(a, b); } -// CHECK-LABEL: define void @test_vst4q_s16(i16* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 @@ -20713,7 +18469,7 @@ void test_vst4q_s16(int16_t * a, int16x8x4_t b) { vst4q_s16(a, b); } -// CHECK-LABEL: define void @test_vst4q_s32(i32* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 @@ -20749,7 +18505,7 @@ void test_vst4q_s32(int32_t * a, int32x4x4_t b) { vst4q_s32(a, b); } -// CHECK-LABEL: define void @test_vst4q_f16(half* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 @@ -20785,7 +18541,7 @@ void test_vst4q_f16(float16_t * a, float16x8x4_t b) { vst4q_f16(a, b); } -// CHECK-LABEL: define void @test_vst4q_f32(float* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 @@ -20821,7 +18577,7 @@ void test_vst4q_f32(float32_t * a, float32x4x4_t b) { vst4q_f32(a, b); } -// CHECK-LABEL: define void @test_vst4q_p8(i8* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 @@ -20848,7 +18604,7 @@ void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) { vst4q_p8(a, b); } -// CHECK-LABEL: define void @test_vst4q_p16(i16* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 @@ -20884,7 +18640,7 @@ void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) { vst4q_p16(a, b); } -// CHECK-LABEL: define void @test_vst4_u8(i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 @@ -20911,7 +18667,7 @@ void test_vst4_u8(uint8_t * a, uint8x8x4_t b) { vst4_u8(a, b); } -// CHECK-LABEL: define void @test_vst4_u16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 @@ -20947,7 +18703,7 @@ void test_vst4_u16(uint16_t * a, uint16x4x4_t b) { vst4_u16(a, b); } -// CHECK-LABEL: define void @test_vst4_u32(i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 @@ -20983,7 +18739,7 @@ void test_vst4_u32(uint32_t * a, uint32x2x4_t b) { vst4_u32(a, b); } -// CHECK-LABEL: define void @test_vst4_u64(i64* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_u64( // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0 @@ -21019,7 +18775,7 @@ void test_vst4_u64(uint64_t * a, uint64x1x4_t b) { vst4_u64(a, b); } -// CHECK-LABEL: define void @test_vst4_s8(i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 @@ -21046,7 +18802,7 @@ void test_vst4_s8(int8_t * a, int8x8x4_t b) { vst4_s8(a, b); } -// CHECK-LABEL: define void @test_vst4_s16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 @@ -21082,7 +18838,7 @@ void test_vst4_s16(int16_t * a, int16x4x4_t b) { vst4_s16(a, b); } -// CHECK-LABEL: define void @test_vst4_s32(i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 @@ -21118,7 +18874,7 @@ void test_vst4_s32(int32_t * a, int32x2x4_t b) { vst4_s32(a, b); } -// CHECK-LABEL: define void @test_vst4_s64(i64* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_s64( // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0 @@ -21154,7 +18910,7 @@ void test_vst4_s64(int64_t * a, int64x1x4_t b) { vst4_s64(a, b); } -// CHECK-LABEL: define void @test_vst4_f16(half* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 @@ -21190,7 +18946,7 @@ void test_vst4_f16(float16_t * a, float16x4x4_t b) { vst4_f16(a, b); } -// CHECK-LABEL: define void @test_vst4_f32(float* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 @@ -21226,7 +18982,7 @@ void test_vst4_f32(float32_t * a, float32x2x4_t b) { vst4_f32(a, b); } -// CHECK-LABEL: define void @test_vst4_p8(i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 @@ -21253,7 +19009,7 @@ void test_vst4_p8(poly8_t * a, poly8x8x4_t b) { vst4_p8(a, b); } -// CHECK-LABEL: define void @test_vst4_p16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 @@ -21289,8 +19045,7 @@ void test_vst4_p16(poly16_t * a, poly16x4x4_t b) { vst4_p16(a, b); } - -// CHECK-LABEL: define void @test_vst4q_lane_u16(i16* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0 @@ -21326,7 +19081,7 @@ void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) { vst4q_lane_u16(a, b, 7); } -// CHECK-LABEL: define void @test_vst4q_lane_u32(i32* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0 @@ -21362,7 +19117,7 @@ void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) { vst4q_lane_u32(a, b, 3); } -// CHECK-LABEL: define void @test_vst4q_lane_s16(i16* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0 @@ -21398,7 +19153,7 @@ void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) { vst4q_lane_s16(a, b, 7); } -// CHECK-LABEL: define void @test_vst4q_lane_s32(i32* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0 @@ -21434,7 +19189,7 @@ void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) { vst4q_lane_s32(a, b, 3); } -// CHECK-LABEL: define void @test_vst4q_lane_f16(half* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0 @@ -21470,7 +19225,7 @@ void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) { vst4q_lane_f16(a, b, 7); } -// CHECK-LABEL: define void @test_vst4q_lane_f32(float* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0 @@ -21506,7 +19261,7 @@ void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) { vst4q_lane_f32(a, b, 3); } -// CHECK-LABEL: define void @test_vst4q_lane_p16(i16* %a, [8 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4q_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0 @@ -21542,7 +19297,7 @@ void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) { vst4q_lane_p16(a, b, 7); } -// CHECK-LABEL: define void @test_vst4_lane_u8(i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_lane_u8( // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 @@ -21569,7 +19324,7 @@ void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) { vst4_lane_u8(a, b, 7); } -// CHECK-LABEL: define void @test_vst4_lane_u16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_lane_u16( // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0 @@ -21605,7 +19360,7 @@ void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) { vst4_lane_u16(a, b, 3); } -// CHECK-LABEL: define void @test_vst4_lane_u32(i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_lane_u32( // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0 @@ -21641,7 +19396,7 @@ void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) { vst4_lane_u32(a, b, 1); } -// CHECK-LABEL: define void @test_vst4_lane_s8(i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_lane_s8( // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 @@ -21668,7 +19423,7 @@ void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) { vst4_lane_s8(a, b, 7); } -// CHECK-LABEL: define void @test_vst4_lane_s16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_lane_s16( // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0 @@ -21704,7 +19459,7 @@ void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) { vst4_lane_s16(a, b, 3); } -// CHECK-LABEL: define void @test_vst4_lane_s32(i32* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_lane_s32( // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0 @@ -21740,7 +19495,7 @@ void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) { vst4_lane_s32(a, b, 1); } -// CHECK-LABEL: define void @test_vst4_lane_f16(half* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_lane_f16( // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0 @@ -21776,7 +19531,7 @@ void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) { vst4_lane_f16(a, b, 3); } -// CHECK-LABEL: define void @test_vst4_lane_f32(float* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_lane_f32( // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0 @@ -21812,7 +19567,7 @@ void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) { vst4_lane_f32(a, b, 1); } -// CHECK-LABEL: define void @test_vst4_lane_p8(i8* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_lane_p8( // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 @@ -21839,7 +19594,7 @@ void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) { vst4_lane_p8(a, b, 7); } -// CHECK-LABEL: define void @test_vst4_lane_p16(i16* %a, [4 x i64] %b.coerce) #0 { +// CHECK-LABEL: @test_vst4_lane_p16( // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0 @@ -21875,140 +19630,136 @@ void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) { vst4_lane_p16(a, b, 3); } - -// CHECK-LABEL: define <8 x i8> @test_vsub_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsub_s8( // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b // CHECK: ret <8 x i8> [[SUB_I]] int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) { return vsub_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vsub_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsub_s16( // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b // CHECK: ret <4 x i16> [[SUB_I]] int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) { return vsub_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vsub_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsub_s32( // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b // CHECK: ret <2 x i32> [[SUB_I]] int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) { return vsub_s32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vsub_s64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsub_s64( // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b // CHECK: ret <1 x i64> [[SUB_I]] int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) { return vsub_s64(a, b); } -// CHECK-LABEL: define <2 x float> @test_vsub_f32(<2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vsub_f32( // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %a, %b // CHECK: ret <2 x float> [[SUB_I]] float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) { return vsub_f32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vsub_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsub_u8( // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %a, %b // CHECK: ret <8 x i8> [[SUB_I]] uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) { return vsub_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vsub_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsub_u16( // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %a, %b // CHECK: ret <4 x i16> [[SUB_I]] uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) { return vsub_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vsub_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsub_u32( // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %a, %b // CHECK: ret <2 x i32> [[SUB_I]] uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) { return vsub_u32(a, b); } -// CHECK-LABEL: define <1 x i64> @test_vsub_u64(<1 x i64> %a, <1 x i64> %b) #0 { +// CHECK-LABEL: @test_vsub_u64( // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %a, %b // CHECK: ret <1 x i64> [[SUB_I]] uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) { return vsub_u64(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vsubq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubq_s8( // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b // CHECK: ret <16 x i8> [[SUB_I]] int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) { return vsubq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubq_s16( // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b // CHECK: ret <8 x i16> [[SUB_I]] int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) { return vsubq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubq_s32( // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) { return vsubq_s32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubq_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsubq_s64( // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) { return vsubq_s64(a, b); } -// CHECK-LABEL: define <4 x float> @test_vsubq_f32(<4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vsubq_f32( // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %a, %b // CHECK: ret <4 x float> [[SUB_I]] float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) { return vsubq_f32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vsubq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubq_u8( // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %a, %b // CHECK: ret <16 x i8> [[SUB_I]] uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) { return vsubq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubq_u16( // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, %b // CHECK: ret <8 x i16> [[SUB_I]] uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) { return vsubq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubq_u32( // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, %b // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) { return vsubq_u32(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubq_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsubq_u64( // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, %b // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) { return vsubq_u64(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubhn_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] @@ -22016,12 +19767,10 @@ int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { return vsubhn_s16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubhn_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] @@ -22029,12 +19778,10 @@ int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { return vsubhn_s32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsubhn_s64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] @@ -22042,12 +19789,10 @@ int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { return vsubhn_s64(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubhn_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> // CHECK: ret <8 x i8> [[VSUBHN2_I]] @@ -22055,12 +19800,10 @@ uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { return vsubhn_u16(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubhn_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> // CHECK: ret <4 x i16> [[VSUBHN2_I]] @@ -22068,12 +19811,10 @@ uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { return vsubhn_u32(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) #0 { +// CHECK-LABEL: @test_vsubhn_u64( // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> -// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP3]] +// CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> // CHECK: ret <2 x i32> [[VSUBHN2_I]] @@ -22081,8 +19822,7 @@ uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { return vsubhn_u64(a, b); } - -// CHECK-LABEL: define <8 x i16> @test_vsubl_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubl_s8( // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] @@ -22091,33 +19831,29 @@ int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) { return vsubl_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubl_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubl_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32> // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) { return vsubl_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubl_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubl_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64> // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) { return vsubl_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubl_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubl_u8( // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] @@ -22126,34 +19862,29 @@ uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) { return vsubl_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubl_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubl_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32> // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) { return vsubl_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubl_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubl_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64> +// CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> +// CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64> // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) { return vsubl_u32(a, b); } - -// CHECK-LABEL: define <8 x i16> @test_vsubw_s8(<8 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubw_s8( // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] @@ -22161,27 +19892,25 @@ int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) { return vsubw_s8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubw_s16(<4 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubw_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32> // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) { return vsubw_s16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubw_s32(<2 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubw_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64> // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) { return vsubw_s32(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vsubw_u8(<8 x i16> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vsubw_u8( // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] // CHECK: ret <8 x i16> [[SUB_I]] @@ -22189,50 +19918,46 @@ uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) { return vsubw_u8(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vsubw_u16(<4 x i32> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vsubw_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32> // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] // CHECK: ret <4 x i32> [[SUB_I]] uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) { return vsubw_u16(a, b); } -// CHECK-LABEL: define <2 x i64> @test_vsubw_u32(<2 x i64> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vsubw_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64> // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] // CHECK: ret <2 x i64> [[SUB_I]] uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { return vsubw_u32(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl1_u8( // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VTBL1_I]] uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) { return vtbl1_u8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl1_s8( // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VTBL1_I]] int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) { return vtbl1_s8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl1_p8( // CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b) #4 // CHECK: ret <8 x i8> [[VTBL1_I]] poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) { return vtbl1_p8(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vtbl2_u8([2 x i64] %a.coerce, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl2_u8( // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0 @@ -22256,7 +19981,7 @@ uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) { return vtbl2_u8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtbl2_s8([2 x i64] %a.coerce, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl2_s8( // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[A:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0 @@ -22280,7 +20005,7 @@ int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) { return vtbl2_s8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtbl2_p8([2 x i64] %a.coerce, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl2_p8( // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0 @@ -22304,8 +20029,7 @@ poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) { return vtbl2_p8(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vtbl3_u8([3 x i64] %a.coerce, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl3_u8( // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0 @@ -22332,7 +20056,7 @@ uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) { return vtbl3_u8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtbl3_s8([3 x i64] %a.coerce, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl3_s8( // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[A:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0 @@ -22359,7 +20083,7 @@ int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) { return vtbl3_s8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtbl3_p8([3 x i64] %a.coerce, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl3_p8( // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0 @@ -22386,8 +20110,7 @@ poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) { return vtbl3_p8(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vtbl4_u8([4 x i64] %a.coerce, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl4_u8( // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0 @@ -22417,7 +20140,7 @@ uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) { return vtbl4_u8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtbl4_s8([4 x i64] %a.coerce, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl4_s8( // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[A:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0 @@ -22447,7 +20170,7 @@ int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) { return vtbl4_s8(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtbl4_p8([4 x i64] %a.coerce, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtbl4_p8( // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0 @@ -22477,30 +20200,28 @@ poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) { return vtbl4_p8(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx1_u8( // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 // CHECK: ret <8 x i8> [[VTBX1_I]] uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) { return vtbx1_u8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx1_s8( // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 // CHECK: ret <8 x i8> [[VTBX1_I]] int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) { return vtbx1_s8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx1_p8( // CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #4 // CHECK: ret <8 x i8> [[VTBX1_I]] poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) { return vtbx1_p8(a, b, c); } - -// CHECK-LABEL: define <8 x i8> @test_vtbx2_u8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx2_u8( // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0 @@ -22524,7 +20245,7 @@ uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) { return vtbx2_u8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vtbx2_s8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx2_s8( // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0 @@ -22548,7 +20269,7 @@ int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) { return vtbx2_s8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vtbx2_p8(<8 x i8> %a, [2 x i64] %b.coerce, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx2_p8( // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0 @@ -22572,8 +20293,7 @@ poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) { return vtbx2_p8(a, b, c); } - -// CHECK-LABEL: define <8 x i8> @test_vtbx3_u8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx3_u8( // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0 @@ -22600,7 +20320,7 @@ uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) { return vtbx3_u8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vtbx3_s8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx3_s8( // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0 @@ -22627,7 +20347,7 @@ int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) { return vtbx3_s8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vtbx3_p8(<8 x i8> %a, [3 x i64] %b.coerce, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx3_p8( // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0 @@ -22654,8 +20374,7 @@ poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) { return vtbx3_p8(a, b, c); } - -// CHECK-LABEL: define <8 x i8> @test_vtbx4_u8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx4_u8( // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0 @@ -22685,7 +20404,7 @@ uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) { return vtbx4_u8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vtbx4_s8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx4_s8( // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0 @@ -22715,7 +20434,7 @@ int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) { return vtbx4_s8(a, b, c); } -// CHECK-LABEL: define <8 x i8> @test_vtbx4_p8(<8 x i8> %a, [4 x i64] %b.coerce, <8 x i8> %c) #0 { +// CHECK-LABEL: @test_vtbx4_p8( // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0 @@ -22745,16 +20464,15 @@ poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) { return vtbx4_p8(a, b, c); } - -// CHECK-LABEL: define void @test_vtrn_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn_s8( // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] +// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], !noalias !3 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] +// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !3 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 @@ -22763,57 +20481,53 @@ int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { return vtrn_s8(a, b); } -// CHECK-LABEL: define void @test_vtrn_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn_s16( // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], !noalias !6 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !6 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { return vtrn_s16(a, b); } -// CHECK-LABEL: define void @test_vtrn_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrn_s32( // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], !noalias !9 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !noalias !9 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { return vtrn_s32(a, b); } -// CHECK-LABEL: define void @test_vtrn_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn_u8( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] +// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], !noalias !12 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] +// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !12 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 @@ -22822,78 +20536,72 @@ uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { return vtrn_u8(a, b); } -// CHECK-LABEL: define void @test_vtrn_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn_u16( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], !noalias !15 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !15 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { return vtrn_u16(a, b); } -// CHECK-LABEL: define void @test_vtrn_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrn_u32( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], !noalias !18 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], !noalias !18 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { return vtrn_u32(a, b); } -// CHECK-LABEL: define void @test_vtrn_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vtrn_f32( // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -// CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -// CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK: store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]], !noalias !21 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK: store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]], !noalias !21 +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { return vtrn_f32(a, b); } -// CHECK-LABEL: define void @test_vtrn_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrn_p8( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]] +// CHECK: store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], !noalias !24 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]] +// CHECK: store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], !noalias !24 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 @@ -22902,36 +20610,34 @@ poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { return vtrn_p8(a, b); } -// CHECK-LABEL: define void @test_vtrn_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrn_p16( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], !noalias !27 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], !noalias !27 +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { return vtrn_p16(a, b); } -// CHECK-LABEL: define void @test_vtrnq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrnq_s8( // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] +// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], !noalias !30 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] +// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !30 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 @@ -22940,57 +20646,53 @@ int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { return vtrnq_s8(a, b); } -// CHECK-LABEL: define void @test_vtrnq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrnq_s16( // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], !noalias !33 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !33 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { return vtrnq_s16(a, b); } -// CHECK-LABEL: define void @test_vtrnq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrnq_s32( // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], !noalias !36 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !36 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { return vtrnq_s32(a, b); } -// CHECK-LABEL: define void @test_vtrnq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrnq_u8( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] +// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], !noalias !39 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] +// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !39 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 @@ -22999,78 +20701,72 @@ uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { return vtrnq_u8(a, b); } -// CHECK-LABEL: define void @test_vtrnq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrnq_u16( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], !noalias !42 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !42 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { return vtrnq_u16(a, b); } -// CHECK-LABEL: define void @test_vtrnq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vtrnq_u32( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], !noalias !45 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], !noalias !45 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { return vtrnq_u32(a, b); } -// CHECK-LABEL: define void @test_vtrnq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vtrnq_f32( // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> -// CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> -// CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK: store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]], !noalias !48 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK: store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], !noalias !48 +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { return vtrnq_f32(a, b); } -// CHECK-LABEL: define void @test_vtrnq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtrnq_p8( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]] +// CHECK: store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], !noalias !51 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]] +// CHECK: store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], !noalias !51 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 @@ -23079,29 +20775,26 @@ poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { return vtrnq_p8(a, b); } -// CHECK-LABEL: define void @test_vtrnq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtrnq_p16( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], !noalias !54 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], !noalias !54 +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { return vtrnq_p16(a, b); } - -// CHECK-LABEL: define <8 x i8> @test_vtst_s8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtst_s8( // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> @@ -23110,33 +20803,29 @@ uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) { return vtst_s8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vtst_s16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtst_s16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> +// CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> // CHECK: ret <4 x i16> [[VTST_I]] uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) { return vtst_s16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vtst_s32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vtst_s32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> +// CHECK: [[TMP2:%.*]] = and <2 x i32> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> // CHECK: ret <2 x i32> [[VTST_I]] uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) { return vtst_s32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtst_u8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtst_u8( // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> @@ -23145,33 +20834,29 @@ uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) { return vtst_u8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vtst_u16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtst_u16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> +// CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> // CHECK: ret <4 x i16> [[VTST_I]] uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) { return vtst_u16(a, b); } -// CHECK-LABEL: define <2 x i32> @test_vtst_u32(<2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vtst_u32( // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP4:%.*]] = and <2 x i32> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <2 x i32> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i32> +// CHECK: [[TMP2:%.*]] = and <2 x i32> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> // CHECK: ret <2 x i32> [[VTST_I]] uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) { return vtst_u32(a, b); } -// CHECK-LABEL: define <8 x i8> @test_vtst_p8(<8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vtst_p8( // CHECK: [[TMP0:%.*]] = and <8 x i8> %a, %b // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> @@ -23180,20 +20865,18 @@ uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) { return vtst_p8(a, b); } -// CHECK-LABEL: define <4 x i16> @test_vtst_p16(<4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vtst_p16( // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP4:%.*]] = and <4 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <4 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i16> +// CHECK: [[TMP2:%.*]] = and <4 x i16> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> // CHECK: ret <4 x i16> [[VTST_I]] uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) { return vtst_p16(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vtstq_s8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtstq_s8( // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> @@ -23202,33 +20885,29 @@ uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) { return vtstq_s8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vtstq_s16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtstq_s16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> +// CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> // CHECK: ret <8 x i16> [[VTST_I]] uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) { return vtstq_s16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vtstq_s32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vtstq_s32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +// CHECK: [[TMP2:%.*]] = and <4 x i32> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> // CHECK: ret <4 x i32> [[VTST_I]] uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) { return vtstq_s32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vtstq_u8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtstq_u8( // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> @@ -23237,33 +20916,29 @@ uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) { return vtstq_u8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vtstq_u16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtstq_u16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> +// CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> // CHECK: ret <8 x i16> [[VTST_I]] uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) { return vtstq_u16(a, b); } -// CHECK-LABEL: define <4 x i32> @test_vtstq_u32(<4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vtstq_u32( // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> +// CHECK: [[TMP2:%.*]] = and <4 x i32> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> // CHECK: ret <4 x i32> [[VTST_I]] uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) { return vtstq_u32(a, b); } -// CHECK-LABEL: define <16 x i8> @test_vtstq_p8(<16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vtstq_p8( // CHECK: [[TMP0:%.*]] = and <16 x i8> %a, %b // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> @@ -23272,29 +20947,26 @@ uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) { return vtstq_p8(a, b); } -// CHECK-LABEL: define <8 x i16> @test_vtstq_p16(<8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vtstq_p16( // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP4:%.*]] = and <8 x i16> [[TMP2]], [[TMP3]] -// CHECK: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer -// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> +// CHECK: [[TMP2:%.*]] = and <8 x i16> %a, %b +// CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer +// CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> // CHECK: ret <8 x i16> [[VTST_I]] uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) { return vtstq_p16(a, b); } - -// CHECK-LABEL: define void @test_vuzp_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp_s8( // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] +// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], !noalias !57 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] +// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !57 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 @@ -23303,57 +20975,53 @@ int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) { return vuzp_s8(a, b); } -// CHECK-LABEL: define void @test_vuzp_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp_s16( // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], !noalias !60 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !60 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) { return vuzp_s16(a, b); } -// CHECK-LABEL: define void @test_vuzp_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzp_s32( // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], !noalias !63 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !noalias !63 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) { return vuzp_s32(a, b); } -// CHECK-LABEL: define void @test_vuzp_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp_u8( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] +// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], !noalias !66 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] +// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !66 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 @@ -23362,78 +21030,72 @@ uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) { return vuzp_u8(a, b); } -// CHECK-LABEL: define void @test_vuzp_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp_u16( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], !noalias !69 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !69 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) { return vuzp_u16(a, b); } -// CHECK-LABEL: define void @test_vuzp_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzp_u32( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], !noalias !72 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], !noalias !72 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) { return vuzp_u32(a, b); } -// CHECK-LABEL: define void @test_vuzp_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vuzp_f32( // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -// CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -// CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK: store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]], !noalias !75 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK: store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]], !noalias !75 +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) { return vuzp_f32(a, b); } -// CHECK-LABEL: define void @test_vuzp_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzp_p8( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]] +// CHECK: store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], !noalias !78 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]] +// CHECK: store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], !noalias !78 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 @@ -23442,36 +21104,34 @@ poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) { return vuzp_p8(a, b); } -// CHECK-LABEL: define void @test_vuzp_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzp_p16( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], !noalias !81 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], !noalias !81 +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) { return vuzp_p16(a, b); } -// CHECK-LABEL: define void @test_vuzpq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzpq_s8( // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] +// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], !noalias !84 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] +// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !84 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 @@ -23480,57 +21140,53 @@ int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) { return vuzpq_s8(a, b); } -// CHECK-LABEL: define void @test_vuzpq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzpq_s16( // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], !noalias !87 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !87 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) { return vuzpq_s16(a, b); } -// CHECK-LABEL: define void @test_vuzpq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzpq_s32( // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], !noalias !90 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !90 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) { return vuzpq_s32(a, b); } -// CHECK-LABEL: define void @test_vuzpq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzpq_u8( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] +// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], !noalias !93 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] +// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !93 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 @@ -23539,78 +21195,72 @@ uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) { return vuzpq_u8(a, b); } -// CHECK-LABEL: define void @test_vuzpq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzpq_u16( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], !noalias !96 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !96 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) { return vuzpq_u16(a, b); } -// CHECK-LABEL: define void @test_vuzpq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vuzpq_u32( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], !noalias !99 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], !noalias !99 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) { return vuzpq_u32(a, b); } -// CHECK-LABEL: define void @test_vuzpq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vuzpq_f32( // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> -// CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> -// CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK: store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]], !noalias !102 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK: store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], !noalias !102 +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) { return vuzpq_f32(a, b); } -// CHECK-LABEL: define void @test_vuzpq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vuzpq_p8( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]] +// CHECK: store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], !noalias !105 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]] +// CHECK: store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], !noalias !105 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 @@ -23619,37 +21269,34 @@ poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) { return vuzpq_p8(a, b); } -// CHECK-LABEL: define void @test_vuzpq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vuzpq_p16( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], !noalias !108 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], !noalias !108 +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) { return vuzpq_p16(a, b); } - -// CHECK-LABEL: define void @test_vzip_s8(%struct.int8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip_s8( // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] +// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], !noalias !111 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] +// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !111 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x8x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 @@ -23658,57 +21305,53 @@ int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) { return vzip_s8(a, b); } -// CHECK-LABEL: define void @test_vzip_s16(%struct.int16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip_s16( // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], !noalias !114 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !114 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) { return vzip_s16(a, b); } -// CHECK-LABEL: define void @test_vzip_s32(%struct.int32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vzip_s32( // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], !noalias !117 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !noalias !117 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x2x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) { return vzip_s32(a, b); } -// CHECK-LABEL: define void @test_vzip_u8(%struct.uint8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip_u8( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] +// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], !noalias !120 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] +// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !120 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 @@ -23717,78 +21360,72 @@ uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) { return vzip_u8(a, b); } -// CHECK-LABEL: define void @test_vzip_u16(%struct.uint16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip_u16( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], !noalias !123 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !123 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) { return vzip_u16(a, b); } -// CHECK-LABEL: define void @test_vzip_u32(%struct.uint32x2x2_t* noalias sret %agg.result, <2 x i32> %a, <2 x i32> %b) #0 { +// CHECK-LABEL: @test_vzip_u32( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> -// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], <2 x i32> -// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], !noalias !126 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> +// CHECK: store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], !noalias !126 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x2x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) { return vzip_u32(a, b); } -// CHECK-LABEL: define void @test_vzip_f32(%struct.float32x2x2_t* noalias sret %agg.result, <2 x float> %a, <2 x float> %b) #0 { +// CHECK-LABEL: @test_vzip_f32( // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x2x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -// CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> -// CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK: store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]], !noalias !129 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> +// CHECK: store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]], !noalias !129 +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x2x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x2x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) { return vzip_f32(a, b); } -// CHECK-LABEL: define void @test_vzip_p8(%struct.poly8x8x2_t* noalias sret %agg.result, <8 x i8> %a, <8 x i8> %b) #0 { +// CHECK-LABEL: @test_vzip_p8( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x8x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]] +// CHECK: store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], !noalias !132 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> -// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]] +// CHECK: store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], !noalias !132 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 16, i32 8, i1 false) #4 @@ -23797,36 +21434,34 @@ poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) { return vzip_p8(a, b); } -// CHECK-LABEL: define void @test_vzip_p16(%struct.poly16x4x2_t* noalias sret %agg.result, <4 x i16> %a, <4 x i16> %b) #0 { +// CHECK-LABEL: @test_vzip_p16( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x4x2_t, align 8 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> -// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 16, i32 8, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], !noalias !135 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> +// CHECK: store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], !noalias !135 +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 16, i32 8, i1 false) #4 // CHECK: ret void poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) { return vzip_p16(a, b); } -// CHECK-LABEL: define void @test_vzipq_s8(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vzipq_s8( // CHECK: [[__RET_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] +// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], !noalias !138 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] +// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !138 // CHECK: [[TMP3:%.*]] = bitcast %struct.int8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.int8x16x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 @@ -23835,57 +21470,53 @@ int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) { return vzipq_s8(a, b); } -// CHECK-LABEL: define void @test_vzipq_s16(%struct.int16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vzipq_s16( // CHECK: [[__RET_I:%.*]] = alloca %struct.int16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], !noalias !141 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !141 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int16x8x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) { return vzipq_s16(a, b); } -// CHECK-LABEL: define void @test_vzipq_s32(%struct.int32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vzipq_s32( // CHECK: [[__RET_I:%.*]] = alloca %struct.int32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], !noalias !144 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !144 +// CHECK: [[TMP5:%.*]] = bitcast %struct.int32x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.int32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) { return vzipq_s32(a, b); } -// CHECK-LABEL: define void @test_vzipq_u8(%struct.uint8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vzipq_u8( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] +// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], !noalias !147 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] +// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !147 // CHECK: [[TMP3:%.*]] = bitcast %struct.uint8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 @@ -23894,78 +21525,72 @@ uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) { return vzipq_u8(a, b); } -// CHECK-LABEL: define void @test_vzipq_u16(%struct.uint16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vzipq_u16( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], !noalias !150 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !150 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint16x8x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) { return vzipq_u16(a, b); } -// CHECK-LABEL: define void @test_vzipq_u32(%struct.uint32x4x2_t* noalias sret %agg.result, <4 x i32> %a, <4 x i32> %b) #0 { +// CHECK-LABEL: @test_vzipq_u32( // CHECK: [[__RET_I:%.*]] = alloca %struct.uint32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], !noalias !153 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +// CHECK: store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], !noalias !153 +// CHECK: [[TMP5:%.*]] = bitcast %struct.uint32x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) { return vzipq_u32(a, b); } -// CHECK-LABEL: define void @test_vzipq_f32(%struct.float32x4x2_t* noalias sret %agg.result, <4 x float> %a, <4 x float> %b) #0 { +// CHECK-LABEL: @test_vzipq_f32( // CHECK: [[__RET_I:%.*]] = alloca %struct.float32x4x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> -// CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> -// CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK: store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]], !noalias !156 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +// CHECK: store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], !noalias !156 +// CHECK: [[TMP5:%.*]] = bitcast %struct.float32x4x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.float32x4x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) { return vzipq_f32(a, b); } -// CHECK-LABEL: define void @test_vzipq_p8(%struct.poly8x16x2_t* noalias sret %agg.result, <16 x i8> %a, <16 x i8> %b) #0 { +// CHECK-LABEL: @test_vzipq_p8( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* // CHECK: [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]] +// CHECK: store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], !noalias !159 // CHECK: [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1 // CHECK: [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]] +// CHECK: store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], !noalias !159 // CHECK: [[TMP3:%.*]] = bitcast %struct.poly8x16x2_t* %agg.result to i8* // CHECK: [[TMP4:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET_I]] to i8* // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP3]], i8* [[TMP4]], i32 32, i32 16, i1 false) #4 @@ -23974,25 +21599,21 @@ poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) { return vzipq_p8(a, b); } -// CHECK-LABEL: define void @test_vzipq_p16(%struct.poly16x8x2_t* noalias sret %agg.result, <8 x i16> %a, <8 x i16> %b) #0 { +// CHECK-LABEL: @test_vzipq_p16( // CHECK: [[__RET_I:%.*]] = alloca %struct.poly16x8x2_t, align 16 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]] -// CHECK: [[TMP6:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 -// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> -// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP6]] -// CHECK: [[TMP7:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* -// CHECK: [[TMP8:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP7]], i8* [[TMP8]], i32 32, i32 16, i1 false) #4 +// CHECK: [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], !noalias !162 +// CHECK: [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1 +// CHECK: [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +// CHECK: store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], !noalias !162 +// CHECK: [[TMP5:%.*]] = bitcast %struct.poly16x8x2_t* %agg.result to i8* +// CHECK: [[TMP6:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET_I]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[TMP5]], i8* [[TMP6]], i32 32, i32 16, i1 false) #4 // CHECK: ret void poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) { return vzipq_p16(a, b); } - -