From: Bill Wendling Date: Sun, 1 Dec 2013 04:32:33 +0000 (+0000) Subject: Merging r195844: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=51d85463d5debc70d034c257c60b245591495367;p=clang Merging r195844: ------------------------------------------------------------------------ r195844 | jiangning | 2013-11-27 06:02:55 -0800 (Wed, 27 Nov 2013) | 2 lines Fix the AArch64 NEON bug exposed by checking constant integer argument range of ACLE intrinsics. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_34@196012 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/aarch64-neon-2velem.c b/test/CodeGen/aarch64-neon-2velem.c index 2a1eae4f16..36166a4db4 100644 --- a/test/CodeGen/aarch64-neon-2velem.c +++ b/test/CodeGen/aarch64-neon-2velem.c @@ -10,14 +10,14 @@ int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) { // CHECK: test_vmla_lane_s16 - return vmla_lane_s16(a, b, v, 1); - // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmla_lane_s16(a, b, v, 3); + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) { // CHECK: test_vmlaq_lane_s16 - return vmlaq_lane_s16(a, b, v, 1); - // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlaq_lane_s16(a, b, v, 3); + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { @@ -34,38 +34,38 @@ int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { int16x4_t test_vmla_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { // CHECK: test_vmla_laneq_s16 - return vmla_laneq_s16(a, b, v, 1); - // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmla_laneq_s16(a, b, v, 7); + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] } int16x8_t test_vmlaq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { // CHECK: test_vmlaq_laneq_s16 - return vmlaq_laneq_s16(a, b, v, 1); - // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlaq_laneq_s16(a, b, v, 7); + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } int32x2_t test_vmla_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { // CHECK: test_vmla_laneq_s32 - return vmla_laneq_s32(a, b, v, 1); - // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmla_laneq_s32(a, b, v, 3); + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } int32x4_t test_vmlaq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { // CHECK: test_vmlaq_laneq_s32 - return vmlaq_laneq_s32(a, b, v, 1); - // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vmlaq_laneq_s32(a, b, v, 3); + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t v) { // CHECK: test_vmls_lane_s16 - return vmls_lane_s16(a, b, v, 1); - // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmls_lane_s16(a, b, v, 3); + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t v) { // CHECK: test_vmlsq_lane_s16 - return vmlsq_lane_s16(a, b, v, 1); - // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlsq_lane_s16(a, b, v, 3); + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t v) { @@ -82,38 +82,38 @@ int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t v) { int16x4_t test_vmls_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { // CHECK: test_vmls_laneq_s16 - return vmls_laneq_s16(a, b, v, 1); - // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmls_laneq_s16(a, b, v, 7); + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] } int16x8_t test_vmlsq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { // CHECK: test_vmlsq_laneq_s16 - return vmlsq_laneq_s16(a, b, v, 1); - // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlsq_laneq_s16(a, b, v, 7); + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } int32x2_t test_vmls_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { // CHECK: test_vmls_laneq_s32 - return vmls_laneq_s32(a, b, v, 1); - // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmls_laneq_s32(a, b, v, 3); + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } int32x4_t test_vmlsq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { // CHECK: test_vmlsq_laneq_s32 - return vmlsq_laneq_s32(a, b, v, 1); - // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vmlsq_laneq_s32(a, b, v, 3); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t v) { // CHECK: test_vmul_lane_s16 - return vmul_lane_s16(a, v, 1); - // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmul_lane_s16(a, v, 3); + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t v) { // CHECK: test_vmulq_lane_s16 - return vmulq_lane_s16(a, v, 1); - // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmulq_lane_s16(a, v, 3); + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t v) { @@ -130,14 +130,14 @@ int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t v) { uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t v) { // CHECK: test_vmul_lane_u16 - return vmul_lane_u16(a, v, 1); - // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmul_lane_u16(a, v, 3); + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t v) { // CHECK: test_vmulq_lane_u16 - return vmulq_lane_u16(a, v, 1); - // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmulq_lane_u16(a, v, 3); + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t v) { @@ -154,50 +154,50 @@ uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t v) { int16x4_t test_vmul_laneq_s16(int16x4_t a, int16x8_t v) { // CHECK: test_vmul_laneq_s16 - return vmul_laneq_s16(a, v, 1); - // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmul_laneq_s16(a, v, 7); + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] } int16x8_t test_vmulq_laneq_s16(int16x8_t a, int16x8_t v) { // CHECK: test_vmulq_laneq_s16 - return vmulq_laneq_s16(a, v, 1); - // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmulq_laneq_s16(a, v, 7); + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } int32x2_t test_vmul_laneq_s32(int32x2_t a, int32x4_t v) { // CHECK: test_vmul_laneq_s32 - return vmul_laneq_s32(a, v, 1); - // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmul_laneq_s32(a, v, 3); + // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } int32x4_t test_vmulq_laneq_s32(int32x4_t a, int32x4_t v) { // CHECK: test_vmulq_laneq_s32 - return vmulq_laneq_s32(a, v, 1); - // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vmulq_laneq_s32(a, v, 3); + // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } uint16x4_t test_vmul_laneq_u16(uint16x4_t a, uint16x8_t v) { // CHECK: test_vmul_laneq_u16 - return vmul_laneq_u16(a, v, 1); - // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmul_laneq_u16(a, v, 7); + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] } uint16x8_t test_vmulq_laneq_u16(uint16x8_t a, uint16x8_t v) { // CHECK: test_vmulq_laneq_u16 - return vmulq_laneq_u16(a, v, 1); - // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmulq_laneq_u16(a, v, 7); + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } uint32x2_t test_vmul_laneq_u32(uint32x2_t a, uint32x4_t v) { // CHECK: test_vmul_laneq_u32 - return vmul_laneq_u32(a, v, 1); - // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmul_laneq_u32(a, v, 3); + // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } uint32x4_t test_vmulq_laneq_u32(uint32x4_t a, uint32x4_t v) { // CHECK: test_vmulq_laneq_u32 - return vmulq_laneq_u32(a, v, 1); - // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vmulq_laneq_u32(a, v, 3); + // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } float32x2_t test_vfma_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { @@ -214,14 +214,14 @@ float32x4_t test_vfmaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { float32x2_t test_vfma_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { // CHECK: test_vfma_laneq_f32 - return vfma_laneq_f32(a, b, v, 1); - // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vfma_laneq_f32(a, b, v, 3); + // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { // CHECK: test_vfmaq_laneq_f32 - return vfmaq_laneq_f32(a, b, v, 1); - // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vfmaq_laneq_f32(a, b, v, 3); + // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { @@ -238,14 +238,14 @@ float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { // CHECK: test_vfms_laneq_f32 - return vfms_laneq_f32(a, b, v, 1); - // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vfms_laneq_f32(a, b, v, 3); + // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } float32x4_t test_vfmsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { // CHECK: test_vfmsq_laneq_f32 - return vfmsq_laneq_f32(a, b, v, 1); - // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vfmsq_laneq_f32(a, b, v, 3); + // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { @@ -254,12 +254,6 @@ float64x2_t test_vfmaq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] } -float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) { - // CHECK: test_vfmaq_laneq_f64 - return vfmaq_laneq_f64(a, b, v, 0); - // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -} - float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { // CHECK: test_vfmaq_laneq_f64 return vfmaq_laneq_f64(a, b, v, 1); @@ -272,12 +266,6 @@ float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] } -float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) { - // CHECK: test_vfmsq_laneq_f64 - return vfmsq_laneq_f64(a, b, v, 0); - // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -} - float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { // CHECK: test_vfmsq_laneq_f64 return vfmsq_laneq_f64(a, b, v, 1); @@ -286,8 +274,8 @@ float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: test_vmlal_lane_s16 - return vmlal_lane_s16(a, b, v, 1); - // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmlal_lane_s16(a, b, v, 3); + // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -298,20 +286,20 @@ int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { int32x4_t test_vmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: test_vmlal_laneq_s16 - return vmlal_laneq_s16(a, b, v, 1); - // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmlal_laneq_s16(a, b, v, 7); + // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] } int64x2_t test_vmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: test_vmlal_laneq_s32 - return vmlal_laneq_s32(a, b, v, 1); - // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmlal_laneq_s32(a, b, v, 3); + // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } int32x4_t test_vmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: test_vmlal_high_lane_s16 - return vmlal_high_lane_s16(a, b, v, 1); - // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlal_high_lane_s16(a, b, v, 3); + // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -322,20 +310,20 @@ int64x2_t test_vmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { int32x4_t test_vmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: test_vmlal_high_laneq_s16 - return vmlal_high_laneq_s16(a, b, v, 1); - // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlal_high_laneq_s16(a, b, v, 7); + // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } int64x2_t test_vmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: test_vmlal_high_laneq_s32 - return vmlal_high_laneq_s32(a, b, v, 1); - // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vmlal_high_laneq_s32(a, b, v, 3); + // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: test_vmlsl_lane_s16 - return vmlsl_lane_s16(a, b, v, 1); - // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmlsl_lane_s16(a, b, v, 3); + // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -346,20 +334,20 @@ int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { int32x4_t test_vmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: test_vmlsl_laneq_s16 - return vmlsl_laneq_s16(a, b, v, 1); - // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmlsl_laneq_s16(a, b, v, 7); + // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] } int64x2_t test_vmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: test_vmlsl_laneq_s32 - return vmlsl_laneq_s32(a, b, v, 1); - // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmlsl_laneq_s32(a, b, v, 3); + // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } int32x4_t test_vmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: test_vmlsl_high_lane_s16 - return vmlsl_high_lane_s16(a, b, v, 1); - // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlsl_high_lane_s16(a, b, v, 3); + // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -370,20 +358,20 @@ int64x2_t test_vmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { int32x4_t test_vmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: test_vmlsl_high_laneq_s16 - return vmlsl_high_laneq_s16(a, b, v, 1); - // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlsl_high_laneq_s16(a, b, v, 7); + // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } int64x2_t test_vmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: test_vmlsl_high_laneq_s32 - return vmlsl_high_laneq_s32(a, b, v, 1); - // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vmlsl_high_laneq_s32(a, b, v, 3); + // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } int32x4_t test_vmlal_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: test_vmlal_lane_u16 - return vmlal_lane_u16(a, b, v, 1); - // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmlal_lane_u16(a, b, v, 3); + // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -394,20 +382,20 @@ int64x2_t test_vmlal_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { int32x4_t test_vmlal_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: test_vmlal_laneq_u16 - return vmlal_laneq_u16(a, b, v, 1); - // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmlal_laneq_u16(a, b, v, 7); + // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] } int64x2_t test_vmlal_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: test_vmlal_laneq_u32 - return vmlal_laneq_u32(a, b, v, 1); - // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmlal_laneq_u32(a, b, v, 3); + // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } int32x4_t test_vmlal_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: test_vmlal_high_lane_u16 - return vmlal_high_lane_u16(a, b, v, 1); - // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlal_high_lane_u16(a, b, v, 3); + // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -418,20 +406,20 @@ int64x2_t test_vmlal_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { int32x4_t test_vmlal_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: test_vmlal_high_laneq_u16 - return vmlal_high_laneq_u16(a, b, v, 1); - // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlal_high_laneq_u16(a, b, v, 7); + // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } int64x2_t test_vmlal_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: test_vmlal_high_laneq_u32 - return vmlal_high_laneq_u32(a, b, v, 1); - // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vmlal_high_laneq_u32(a, b, v, 3); + // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } int32x4_t test_vmlsl_lane_u16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: test_vmlsl_lane_u16 - return vmlsl_lane_u16(a, b, v, 1); - // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmlsl_lane_u16(a, b, v, 3); + // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -442,20 +430,20 @@ int64x2_t test_vmlsl_lane_u32(int64x2_t a, int32x2_t b, int32x2_t v) { int32x4_t test_vmlsl_laneq_u16(int32x4_t a, int16x4_t b, int16x8_t v) { // CHECK: test_vmlsl_laneq_u16 - return vmlsl_laneq_u16(a, b, v, 1); - // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmlsl_laneq_u16(a, b, v, 7); + // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] } int64x2_t test_vmlsl_laneq_u32(int64x2_t a, int32x2_t b, int32x4_t v) { // CHECK: test_vmlsl_laneq_u32 - return vmlsl_laneq_u32(a, b, v, 1); - // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmlsl_laneq_u32(a, b, v, 3); + // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } int32x4_t test_vmlsl_high_lane_u16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: test_vmlsl_high_lane_u16 - return vmlsl_high_lane_u16(a, b, v, 1); - // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlsl_high_lane_u16(a, b, v, 3); + // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -466,20 +454,20 @@ int64x2_t test_vmlsl_high_lane_u32(int64x2_t a, int32x4_t b, int32x2_t v) { int32x4_t test_vmlsl_high_laneq_u16(int32x4_t a, int16x8_t b, int16x8_t v) { // CHECK: test_vmlsl_high_laneq_u16 - return vmlsl_high_laneq_u16(a, b, v, 1); - // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmlsl_high_laneq_u16(a, b, v, 7); + // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } int64x2_t test_vmlsl_high_laneq_u32(int64x2_t a, int32x4_t b, int32x4_t v) { // CHECK: test_vmlsl_high_laneq_u32 - return vmlsl_high_laneq_u32(a, b, v, 1); - // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vmlsl_high_laneq_u32(a, b, v, 3); + // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t v) { // CHECK: test_vmull_lane_s16 - return vmull_lane_s16(a, v, 1); - // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmull_lane_s16(a, v, 3); + // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) { @@ -490,8 +478,8 @@ int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t v) { uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t v) { // CHECK: test_vmull_lane_u16 - return vmull_lane_u16(a, v, 1); - // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmull_lane_u16(a, v, 3); + // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) { @@ -502,8 +490,8 @@ uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t v) { int32x4_t test_vmull_high_lane_s16(int16x8_t a, int16x4_t v) { // CHECK: test_vmull_high_lane_s16 - return vmull_high_lane_s16(a, v, 1); - // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmull_high_lane_s16(a, v, 3); + // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) { @@ -514,8 +502,8 @@ int64x2_t test_vmull_high_lane_s32(int32x4_t a, int32x2_t v) { uint32x4_t test_vmull_high_lane_u16(uint16x8_t a, uint16x4_t v) { // CHECK: test_vmull_high_lane_u16 - return vmull_high_lane_u16(a, v, 1); - // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmull_high_lane_u16(a, v, 3); + // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) { @@ -526,56 +514,56 @@ uint64x2_t test_vmull_high_lane_u32(uint32x4_t a, uint32x2_t v) { int32x4_t test_vmull_laneq_s16(int16x4_t a, int16x8_t v) { // CHECK: test_vmull_laneq_s16 - return vmull_laneq_s16(a, v, 1); - // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmull_laneq_s16(a, v, 7); + // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] } int64x2_t test_vmull_laneq_s32(int32x2_t a, int32x4_t v) { // CHECK: test_vmull_laneq_s32 - return vmull_laneq_s32(a, v, 1); - // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmull_laneq_s32(a, v, 3); + // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } uint32x4_t test_vmull_laneq_u16(uint16x4_t a, uint16x8_t v) { // CHECK: test_vmull_laneq_u16 - return vmull_laneq_u16(a, v, 1); - // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vmull_laneq_u16(a, v, 7); + // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] } uint64x2_t test_vmull_laneq_u32(uint32x2_t a, uint32x4_t v) { // CHECK: test_vmull_laneq_u32 - return vmull_laneq_u32(a, v, 1); - // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmull_laneq_u32(a, v, 3); + // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } int32x4_t test_vmull_high_laneq_s16(int16x8_t a, int16x8_t v) { // CHECK: test_vmull_high_laneq_s16 - return vmull_high_laneq_s16(a, v, 1); - // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmull_high_laneq_s16(a, v, 7); + // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } int64x2_t test_vmull_high_laneq_s32(int32x4_t a, int32x4_t v) { // CHECK: test_vmull_high_laneq_s32 - return vmull_high_laneq_s32(a, v, 1); - // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vmull_high_laneq_s32(a, v, 3); + // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } uint32x4_t test_vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v) { // CHECK: test_vmull_high_laneq_u16 - return vmull_high_laneq_u16(a, v, 1); - // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vmull_high_laneq_u16(a, v, 7); + // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } uint64x2_t test_vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v) { // CHECK: test_vmull_high_laneq_u32 - return vmull_high_laneq_u32(a, v, 1); - // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vmull_high_laneq_u32(a, v, 3); + // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: test_vqdmlal_lane_s16 - return vqdmlal_lane_s16(a, b, v, 1); - // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vqdmlal_lane_s16(a, b, v, 3); + // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -586,8 +574,8 @@ int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { int32x4_t test_vqdmlal_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: test_vqdmlal_high_lane_s16 - return vqdmlal_high_lane_s16(a, b, v, 1); - // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vqdmlal_high_lane_s16(a, b, v, 3); + // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -598,8 +586,8 @@ int64x2_t test_vqdmlal_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: test_vqdmlsl_lane_s16 - return vqdmlsl_lane_s16(a, b, v, 1); - // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vqdmlsl_lane_s16(a, b, v, 3); + // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { @@ -610,8 +598,8 @@ int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t v) { int32x4_t test_vqdmlsl_high_lane_s16(int32x4_t a, int16x8_t b, int16x4_t v) { // CHECK: test_vqdmlsl_high_lane_s16 - return vqdmlsl_high_lane_s16(a, b, v, 1); - // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vqdmlsl_high_lane_s16(a, b, v, 3); + // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { @@ -622,8 +610,8 @@ int64x2_t test_vqdmlsl_high_lane_s32(int64x2_t a, int32x4_t b, int32x2_t v) { int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t v) { // CHECK: test_vqdmull_lane_s16 - return vqdmull_lane_s16(a, v, 1); - // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vqdmull_lane_s16(a, v, 3); + // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) { @@ -634,20 +622,20 @@ int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t v) { int32x4_t test_vqdmull_laneq_s16(int16x4_t a, int16x8_t v) { // CHECK: test_vqdmull_laneq_s16 - return vqdmull_laneq_s16(a, v, 1); - // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vqdmull_laneq_s16(a, v, 3); + // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int64x2_t test_vqdmull_laneq_s32(int32x2_t a, int32x4_t v) { // CHECK: test_vqdmull_laneq_s32 - return vqdmull_laneq_s32(a, v, 1); - // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vqdmull_laneq_s32(a, v, 3); + // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } int32x4_t test_vqdmull_high_lane_s16(int16x8_t a, int16x4_t v) { // CHECK: test_vqdmull_high_lane_s16 - return vqdmull_high_lane_s16(a, v, 1); - // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vqdmull_high_lane_s16(a, v, 3); + // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) { @@ -658,26 +646,26 @@ int64x2_t test_vqdmull_high_lane_s32(int32x4_t a, int32x2_t v) { int32x4_t test_vqdmull_high_laneq_s16(int16x8_t a, int16x8_t v) { // CHECK: test_vqdmull_high_laneq_s16 - return vqdmull_high_laneq_s16(a, v, 1); - // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vqdmull_high_laneq_s16(a, v, 7); + // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] } int64x2_t test_vqdmull_high_laneq_s32(int32x4_t a, int32x4_t v) { // CHECK: test_vqdmull_high_laneq_s32 - return vqdmull_high_laneq_s32(a, v, 1); - // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vqdmull_high_laneq_s32(a, v, 3); + // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t v) { // CHECK: test_vqdmulh_lane_s16 - return vqdmulh_lane_s16(a, v, 1); - // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vqdmulh_lane_s16(a, v, 3); + // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t v) { // CHECK: test_vqdmulhq_lane_s16 - return vqdmulhq_lane_s16(a, v, 1); - // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vqdmulhq_lane_s16(a, v, 3); + // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t v) { @@ -694,14 +682,14 @@ int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t v) { int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t v) { // CHECK: test_vqrdmulh_lane_s16 - return vqrdmulh_lane_s16(a, v, 1); - // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[1] + return vqrdmulh_lane_s16(a, v, 3); + // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] } int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t v) { // CHECK: test_vqrdmulhq_lane_s16 - return vqrdmulhq_lane_s16(a, v, 1); - // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[1] + return vqrdmulhq_lane_s16(a, v, 3); + // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] } int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t v) { @@ -744,27 +732,21 @@ float64x2_t test_vmulq_lane_f64(float64x2_t a, float64x1_t v) { float32x2_t test_vmul_laneq_f32(float32x2_t a, float32x4_t v) { // CHECK: test_vmul_laneq_f32 - return vmul_laneq_f32(a, v, 1); - // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmul_laneq_f32(a, v, 3); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } -float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) { - // CHECK: test_vmul_laneq_f64_0 - return vmul_laneq_f64(a, v, 0); - // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +float64x1_t test_vmul_laneq_f64(float64x1_t a, float64x2_t v) { + // CHECK: test_vmul_laneq_f64 + return vmul_laneq_f64(a, v, 1); + // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] } float32x4_t test_vmulq_laneq_f32(float32x4_t a, float32x4_t v) { // CHECK: test_vmulq_laneq_f32 - return vmulq_laneq_f32(a, v, 1); - // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -} - -float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) { - // CHECK: test_vmulq_laneq_f64 - return vmulq_laneq_f64(a, v, 0); - // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] + return vmulq_laneq_f32(a, v, 3); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } float64x2_t test_vmulq_laneq_f64(float64x2_t a, float64x2_t v) { @@ -793,20 +775,14 @@ float64x2_t test_vmulxq_lane_f64(float64x2_t a, float64x1_t v) { float32x2_t test_vmulx_laneq_f32(float32x2_t a, float32x4_t v) { // CHECK: test_vmulx_laneq_f32 - return vmulx_laneq_f32(a, v, 1); - // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vmulx_laneq_f32(a, v, 3); + // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] } float32x4_t test_vmulxq_laneq_f32(float32x4_t a, float32x4_t v) { // CHECK: test_vmulxq_laneq_f32 - return vmulxq_laneq_f32(a, v, 1); - // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -} - -float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) { - // CHECK: test_vmulxq_laneq_f64 - return vmulxq_laneq_f64(a, v, 0); - // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] + return vmulxq_laneq_f32(a, v, 3); + // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) { @@ -815,3 +791,759 @@ float64x2_t test_vmulxq_laneq_f64(float64x2_t a, float64x2_t v) { // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] } +int16x4_t test_vmla_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) { + // CHECK: test_vmla_lane_s16_0 + return vmla_lane_s16(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int16x8_t test_vmlaq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) { + // CHECK: test_vmlaq_lane_s16_0 + return vmlaq_lane_s16(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int32x2_t test_vmla_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) { + // CHECK: test_vmla_lane_s32_0 + return vmla_lane_s32(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlaq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) { + // CHECK: test_vmlaq_lane_s32_0 + return vmlaq_lane_s32(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int16x4_t test_vmla_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) { + // CHECK: test_vmla_laneq_s16_0 + return vmla_laneq_s16(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int16x8_t test_vmlaq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) { + // CHECK: test_vmlaq_laneq_s16_0 + return vmlaq_laneq_s16(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int32x2_t test_vmla_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) { + // CHECK: test_vmla_laneq_s32_0 + return vmla_laneq_s32(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlaq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) { + // CHECK: test_vmlaq_laneq_s32_0 + return vmlaq_laneq_s32(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int16x4_t test_vmls_lane_s16_0(int16x4_t a, int16x4_t b, int16x4_t v) { + // CHECK: test_vmls_lane_s16_0 + return vmls_lane_s16(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int16x8_t test_vmlsq_lane_s16_0(int16x8_t a, int16x8_t b, int16x4_t v) { + // CHECK: test_vmlsq_lane_s16_0 + return vmlsq_lane_s16(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int32x2_t test_vmls_lane_s32_0(int32x2_t a, int32x2_t b, int32x2_t v) { + // CHECK: test_vmls_lane_s32_0 + return vmls_lane_s32(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsq_lane_s32_0(int32x4_t a, int32x4_t b, int32x2_t v) { + // CHECK: test_vmlsq_lane_s32_0 + return vmlsq_lane_s32(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int16x4_t test_vmls_laneq_s16_0(int16x4_t a, int16x4_t b, int16x8_t v) { + // CHECK: test_vmls_laneq_s16_0 + return vmls_laneq_s16(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int16x8_t test_vmlsq_laneq_s16_0(int16x8_t a, int16x8_t b, int16x8_t v) { + // CHECK: test_vmlsq_laneq_s16_0 + return vmlsq_laneq_s16(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int32x2_t test_vmls_laneq_s32_0(int32x2_t a, int32x2_t b, int32x4_t v) { + // CHECK: test_vmls_laneq_s32_0 + return vmls_laneq_s32(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsq_laneq_s32_0(int32x4_t a, int32x4_t b, int32x4_t v) { + // CHECK: test_vmlsq_laneq_s32_0 + return vmlsq_laneq_s32(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int16x4_t test_vmul_lane_s16_0(int16x4_t a, int16x4_t v) { + // CHECK: test_vmul_lane_s16_0 + return vmul_lane_s16(a, v, 0); + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int16x8_t test_vmulq_lane_s16_0(int16x8_t a, int16x4_t v) { + // CHECK: test_vmulq_lane_s16_0 + return vmulq_lane_s16(a, v, 0); + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int32x2_t test_vmul_lane_s32_0(int32x2_t a, int32x2_t v) { + // CHECK: test_vmul_lane_s32_0 + return vmul_lane_s32(a, v, 0); + // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmulq_lane_s32_0(int32x4_t a, int32x2_t v) { + // CHECK: test_vmulq_lane_s32_0 + return vmulq_lane_s32(a, v, 0); + // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint16x4_t test_vmul_lane_u16_0(uint16x4_t a, uint16x4_t v) { + // CHECK: test_vmul_lane_u16_0 + return vmul_lane_u16(a, v, 0); + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +uint16x8_t test_vmulq_lane_u16_0(uint16x8_t a, uint16x4_t v) { + // CHECK: test_vmulq_lane_u16_0 + return vmulq_lane_u16(a, v, 0); + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint32x2_t test_vmul_lane_u32_0(uint32x2_t a, uint32x2_t v) { + // CHECK: test_vmul_lane_u32_0 + return vmul_lane_u32(a, v, 0); + // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmulq_lane_u32_0(uint32x4_t a, uint32x2_t v) { + // CHECK: test_vmulq_lane_u32_0 + return vmulq_lane_u32(a, v, 0); + // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int16x4_t test_vmul_laneq_s16_0(int16x4_t a, int16x8_t v) { + // CHECK: test_vmul_laneq_s16_0 + return vmul_laneq_s16(a, v, 0); + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int16x8_t test_vmulq_laneq_s16_0(int16x8_t a, int16x8_t v) { + // CHECK: test_vmulq_laneq_s16_0 + return vmulq_laneq_s16(a, v, 0); + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int32x2_t test_vmul_laneq_s32_0(int32x2_t a, int32x4_t v) { + // CHECK: test_vmul_laneq_s32_0 + return vmul_laneq_s32(a, v, 0); + // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmulq_laneq_s32_0(int32x4_t a, int32x4_t v) { + // CHECK: test_vmulq_laneq_s32_0 + return vmulq_laneq_s32(a, v, 0); + // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint16x4_t test_vmul_laneq_u16_0(uint16x4_t a, uint16x8_t v) { + // CHECK: test_vmul_laneq_u16_0 + return vmul_laneq_u16(a, v, 0); + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +uint16x8_t test_vmulq_laneq_u16_0(uint16x8_t a, uint16x8_t v) { + // CHECK: test_vmulq_laneq_u16_0 + return vmulq_laneq_u16(a, v, 0); + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint32x2_t test_vmul_laneq_u32_0(uint32x2_t a, uint32x4_t v) { + // CHECK: test_vmul_laneq_u32_0 + return vmul_laneq_u32(a, v, 0); + // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmulq_laneq_u32_0(uint32x4_t a, uint32x4_t v) { + // CHECK: test_vmulq_laneq_u32_0 + return vmulq_laneq_u32(a, v, 0); + // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vfma_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { + // CHECK: test_vfma_lane_f32_0 + return vfma_lane_f32(a, b, v, 0); + // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vfmaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { + // CHECK: test_vfmaq_lane_f32_0 + return vfmaq_lane_f32(a, b, v, 0); + // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vfma_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { + // CHECK: test_vfma_laneq_f32_0 + return vfma_laneq_f32(a, b, v, 0); + // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { + // CHECK: test_vfmaq_laneq_f32_0 + return vfmaq_laneq_f32(a, b, v, 0); + // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { + // CHECK: test_vfms_lane_f32_0 + return vfms_lane_f32(a, b, v, 0); + // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { + // CHECK: test_vfmsq_lane_f32_0 + return vfmsq_lane_f32(a, b, v, 0); + // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { + // CHECK: test_vfms_laneq_f32_0 + return vfms_laneq_f32(a, b, v, 0); + // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vfmsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { + // CHECK: test_vfmsq_laneq_f32_0 + return vfmsq_laneq_f32(a, b, v, 0); + // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) { + // CHECK: test_vfmaq_laneq_f64_0 + return vfmaq_laneq_f64(a, b, v, 0); + // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +float64x2_t test_vfmsq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) { + // CHECK: test_vfmsq_laneq_f64_0 + return vfmsq_laneq_f64(a, b, v, 0); + // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +int32x4_t test_vmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { + // CHECK: test_vmlal_lane_s16_0 + return vmlal_lane_s16(a, b, v, 0); + // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { + // CHECK: test_vmlal_lane_s32_0 + return vmlal_lane_s32(a, b, v, 0); + // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { + // CHECK: test_vmlal_laneq_s16_0 + return vmlal_laneq_s16(a, b, v, 0); + // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { + // CHECK: test_vmlal_laneq_s32_0 + return vmlal_laneq_s32(a, b, v, 0); + // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { + // CHECK: test_vmlal_high_lane_s16_0 + return vmlal_high_lane_s16(a, b, v, 0); + // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { + // CHECK: test_vmlal_high_lane_s32_0 + return vmlal_high_lane_s32(a, b, v, 0); + // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { + // CHECK: test_vmlal_high_laneq_s16_0 + return vmlal_high_laneq_s16(a, b, v, 0); + // CHECK: smlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { + // CHECK: test_vmlal_high_laneq_s32_0 + return vmlal_high_laneq_s32(a, b, v, 0); + // CHECK: smlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { + // CHECK: test_vmlsl_lane_s16_0 + return vmlsl_lane_s16(a, b, v, 0); + // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { + // CHECK: test_vmlsl_lane_s32_0 + return vmlsl_lane_s32(a, b, v, 0); + // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { + // CHECK: test_vmlsl_laneq_s16_0 + return vmlsl_laneq_s16(a, b, v, 0); + // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { + // CHECK: test_vmlsl_laneq_s32_0 + return vmlsl_laneq_s32(a, b, v, 0); + // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { + // CHECK: test_vmlsl_high_lane_s16_0 + return vmlsl_high_lane_s16(a, b, v, 0); + // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { + // CHECK: test_vmlsl_high_lane_s32_0 + return vmlsl_high_lane_s32(a, b, v, 0); + // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { + // CHECK: test_vmlsl_high_laneq_s16_0 + return vmlsl_high_laneq_s16(a, b, v, 0); + // CHECK: smlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { + // CHECK: test_vmlsl_high_laneq_s32_0 + return vmlsl_high_laneq_s32(a, b, v, 0); + // CHECK: smlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlal_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { + // CHECK: test_vmlal_lane_u16_0 + return vmlal_lane_u16(a, b, v, 0); + // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlal_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { + // CHECK: test_vmlal_lane_u32_0 + return vmlal_lane_u32(a, b, v, 0); + // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlal_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { + // CHECK: test_vmlal_laneq_u16_0 + return vmlal_laneq_u16(a, b, v, 0); + // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlal_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { + // CHECK: test_vmlal_laneq_u32_0 + return vmlal_laneq_u32(a, b, v, 0); + // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlal_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { + // CHECK: test_vmlal_high_lane_u16_0 + return vmlal_high_lane_u16(a, b, v, 0); + // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlal_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { + // CHECK: test_vmlal_high_lane_u32_0 + return vmlal_high_lane_u32(a, b, v, 0); + // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlal_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { + // CHECK: test_vmlal_high_laneq_u16_0 + return vmlal_high_laneq_u16(a, b, v, 0); + // CHECK: umlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlal_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { + // CHECK: test_vmlal_high_laneq_u32_0 + return vmlal_high_laneq_u32(a, b, v, 0); + // CHECK: umlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsl_lane_u16_0(int32x4_t a, int16x4_t b, int16x4_t v) { + // CHECK: test_vmlsl_lane_u16_0 + return vmlsl_lane_u16(a, b, v, 0); + // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlsl_lane_u32_0(int64x2_t a, int32x2_t b, int32x2_t v) { + // CHECK: test_vmlsl_lane_u32_0 + return vmlsl_lane_u32(a, b, v, 0); + // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsl_laneq_u16_0(int32x4_t a, int16x4_t b, int16x8_t v) { + // CHECK: test_vmlsl_laneq_u16_0 + return vmlsl_laneq_u16(a, b, v, 0); + // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlsl_laneq_u32_0(int64x2_t a, int32x2_t b, int32x4_t v) { + // CHECK: test_vmlsl_laneq_u32_0 + return vmlsl_laneq_u32(a, b, v, 0); + // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsl_high_lane_u16_0(int32x4_t a, int16x8_t b, int16x4_t v) { + // CHECK: test_vmlsl_high_lane_u16_0 + return vmlsl_high_lane_u16(a, b, v, 0); + // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlsl_high_lane_u32_0(int64x2_t a, int32x4_t b, int32x2_t v) { + // CHECK: test_vmlsl_high_lane_u32_0 + return vmlsl_high_lane_u32(a, b, v, 0); + // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmlsl_high_laneq_u16_0(int32x4_t a, int16x8_t b, int16x8_t v) { + // CHECK: test_vmlsl_high_laneq_u16_0 + return vmlsl_high_laneq_u16(a, b, v, 0); + // CHECK: umlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmlsl_high_laneq_u32_0(int64x2_t a, int32x4_t b, int32x4_t v) { + // CHECK: test_vmlsl_high_laneq_u32_0 + return vmlsl_high_laneq_u32(a, b, v, 0); + // CHECK: umlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmull_lane_s16_0(int16x4_t a, int16x4_t v) { + // CHECK: test_vmull_lane_s16_0 + return vmull_lane_s16(a, v, 0); + // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmull_lane_s32_0(int32x2_t a, int32x2_t v) { + // CHECK: test_vmull_lane_s32_0 + return vmull_lane_s32(a, v, 0); + // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmull_lane_u16_0(uint16x4_t a, uint16x4_t v) { + // CHECK: test_vmull_lane_u16_0 + return vmull_lane_u16(a, v, 0); + // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +uint64x2_t test_vmull_lane_u32_0(uint32x2_t a, uint32x2_t v) { + // CHECK: test_vmull_lane_u32_0 + return vmull_lane_u32(a, v, 0); + // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { + // CHECK: test_vmull_high_lane_s16_0 + return vmull_high_lane_s16(a, v, 0); + // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { + // CHECK: test_vmull_high_lane_s32_0 + return vmull_high_lane_s32(a, v, 0); + // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmull_high_lane_u16_0(uint16x8_t a, uint16x4_t v) { + // CHECK: test_vmull_high_lane_u16_0 + return vmull_high_lane_u16(a, v, 0); + // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint64x2_t test_vmull_high_lane_u32_0(uint32x4_t a, uint32x2_t v) { + // CHECK: test_vmull_high_lane_u32_0 + return vmull_high_lane_u32(a, v, 0); + // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmull_laneq_s16_0(int16x4_t a, int16x8_t v) { + // CHECK: test_vmull_laneq_s16_0 + return vmull_laneq_s16(a, v, 0); + // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmull_laneq_s32_0(int32x2_t a, int32x4_t v) { + // CHECK: test_vmull_laneq_s32_0 + return vmull_laneq_s32(a, v, 0); + // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmull_laneq_u16_0(uint16x4_t a, uint16x8_t v) { + // CHECK: test_vmull_laneq_u16_0 + return vmull_laneq_u16(a, v, 0); + // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +uint64x2_t test_vmull_laneq_u32_0(uint32x2_t a, uint32x4_t v) { + // CHECK: test_vmull_laneq_u32_0 + return vmull_laneq_u32(a, v, 0); + // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { + // CHECK: test_vmull_high_laneq_s16_0 + return vmull_high_laneq_s16(a, v, 0); + // CHECK: smull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { + // CHECK: test_vmull_high_laneq_s32_0 + return vmull_high_laneq_s32(a, v, 0); + // CHECK: smull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmull_high_laneq_u16_0(uint16x8_t a, uint16x8_t v) { + // CHECK: test_vmull_high_laneq_u16_0 + return vmull_high_laneq_u16(a, v, 0); + // CHECK: umull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint64x2_t test_vmull_high_laneq_u32_0(uint32x4_t a, uint32x4_t v) { + // CHECK: test_vmull_high_laneq_u32_0 + return vmull_high_laneq_u32(a, v, 0); + // CHECK: umull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmlal_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { + // CHECK: test_vqdmlal_lane_s16_0 + return vqdmlal_lane_s16(a, b, v, 0); + // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmlal_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { + // CHECK: test_vqdmlal_lane_s32_0 + return vqdmlal_lane_s32(a, b, v, 0); + // CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmlal_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { + // CHECK: test_vqdmlal_high_lane_s16_0 + return vqdmlal_high_lane_s16(a, b, v, 0); + // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmlal_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { + // CHECK: test_vqdmlal_high_lane_s32_0 + return vqdmlal_high_lane_s32(a, b, v, 0); + // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmlsl_lane_s16_0(int32x4_t a, int16x4_t b, int16x4_t v) { + // CHECK: test_vqdmlsl_lane_s16_0 + return vqdmlsl_lane_s16(a, b, v, 0); + // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmlsl_lane_s32_0(int64x2_t a, int32x2_t b, int32x2_t v) { + // CHECK: test_vqdmlsl_lane_s32_0 + return vqdmlsl_lane_s32(a, b, v, 0); + // CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmlsl_high_lane_s16_0(int32x4_t a, int16x8_t b, int16x4_t v) { + // CHECK: test_vqdmlsl_high_lane_s16_0 + return vqdmlsl_high_lane_s16(a, b, v, 0); + // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmlsl_high_lane_s32_0(int64x2_t a, int32x4_t b, int32x2_t v) { + // CHECK: test_vqdmlsl_high_lane_s32_0 + return vqdmlsl_high_lane_s32(a, b, v, 0); + // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmull_lane_s16_0(int16x4_t a, int16x4_t v) { + // CHECK: test_vqdmull_lane_s16_0 + return vqdmull_lane_s16(a, v, 0); + // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmull_lane_s32_0(int32x2_t a, int32x2_t v) { + // CHECK: test_vqdmull_lane_s32_0 + return vqdmull_lane_s32(a, v, 0); + // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmull_laneq_s16_0(int16x4_t a, int16x8_t v) { + // CHECK: test_vqdmull_laneq_s16_0 + return vqdmull_laneq_s16(a, v, 0); + // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmull_laneq_s32_0(int32x2_t a, int32x4_t v) { + // CHECK: test_vqdmull_laneq_s32_0 + return vqdmull_laneq_s32(a, v, 0); + // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmull_high_lane_s16_0(int16x8_t a, int16x4_t v) { + // CHECK: test_vqdmull_high_lane_s16_0 + return vqdmull_high_lane_s16(a, v, 0); + // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmull_high_lane_s32_0(int32x4_t a, int32x2_t v) { + // CHECK: test_vqdmull_high_lane_s32_0 + return vqdmull_high_lane_s32(a, v, 0); + // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmull_high_laneq_s16_0(int16x8_t a, int16x8_t v) { + // CHECK: test_vqdmull_high_laneq_s16_0 + return vqdmull_high_laneq_s16(a, v, 0); + // CHECK: sqdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmull_high_laneq_s32_0(int32x4_t a, int32x4_t v) { + // CHECK: test_vqdmull_high_laneq_s32_0 + return vqdmull_high_laneq_s32(a, v, 0); + // CHECK: sqdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int16x4_t test_vqdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { + // CHECK: test_vqdmulh_lane_s16_0 + return vqdmulh_lane_s16(a, v, 0); + // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int16x8_t test_vqdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { + // CHECK: test_vqdmulhq_lane_s16_0 + return vqdmulhq_lane_s16(a, v, 0); + // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int32x2_t test_vqdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { + // CHECK: test_vqdmulh_lane_s32_0 + return vqdmulh_lane_s32(a, v, 0); + // CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { + // CHECK: test_vqdmulhq_lane_s32_0 + return vqdmulhq_lane_s32(a, v, 0); + // CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int16x4_t test_vqrdmulh_lane_s16_0(int16x4_t a, int16x4_t v) { + // CHECK: test_vqrdmulh_lane_s16_0 + return vqrdmulh_lane_s16(a, v, 0); + // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int16x8_t test_vqrdmulhq_lane_s16_0(int16x8_t a, int16x4_t v) { + // CHECK: test_vqrdmulhq_lane_s16_0 + return vqrdmulhq_lane_s16(a, v, 0); + // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int32x2_t test_vqrdmulh_lane_s32_0(int32x2_t a, int32x2_t v) { + // CHECK: test_vqrdmulh_lane_s32_0 + return vqrdmulh_lane_s32(a, v, 0); + // CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqrdmulhq_lane_s32_0(int32x4_t a, int32x2_t v) { + // CHECK: test_vqrdmulhq_lane_s32_0 + return vqrdmulhq_lane_s32(a, v, 0); + // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vmul_lane_f32_0(float32x2_t a, float32x2_t v) { + // CHECK: test_vmul_lane_f32_0 + return vmul_lane_f32(a, v, 0); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vmulq_lane_f32_0(float32x4_t a, float32x2_t v) { + // CHECK: test_vmulq_lane_f32_0 + return vmulq_lane_f32(a, v, 0); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vmul_laneq_f32_0(float32x2_t a, float32x4_t v) { + // CHECK: test_vmul_laneq_f32_0 + return vmul_laneq_f32(a, v, 0); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) { + // CHECK: test_vmul_laneq_f64_0 + return vmul_laneq_f64(a, v, 0); + // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +} + +float32x4_t test_vmulq_laneq_f32_0(float32x4_t a, float32x4_t v) { + // CHECK: test_vmulq_laneq_f32_0 + return vmulq_laneq_f32(a, v, 0); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float64x2_t test_vmulq_laneq_f64_0(float64x2_t a, float64x2_t v) { + // CHECK: test_vmulq_laneq_f64_0 + return vmulq_laneq_f64(a, v, 0); + // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +float32x2_t test_vmulx_lane_f32_0(float32x2_t a, float32x2_t v) { + // CHECK: test_vmulx_lane_f32_0 + return vmulx_lane_f32(a, v, 0); + // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vmulxq_lane_f32_0(float32x4_t a, float32x2_t v) { + // CHECK: test_vmulxq_lane_f32_0 + return vmulxq_lane_f32(a, v, 0); + // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float64x2_t test_vmulxq_lane_f64_0(float64x2_t a, float64x1_t v) { + // CHECK: test_vmulxq_lane_f64_0 + return vmulxq_lane_f64(a, v, 0); + // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + +float32x2_t test_vmulx_laneq_f32_0(float32x2_t a, float32x4_t v) { + // CHECK: test_vmulx_laneq_f32_0 + return vmulx_laneq_f32(a, v, 0); + // CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vmulxq_laneq_f32_0(float32x4_t a, float32x4_t v) { + // CHECK: test_vmulxq_laneq_f32_0 + return vmulxq_laneq_f32(a, v, 0); + // CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float64x2_t test_vmulxq_laneq_f64_0(float64x2_t a, float64x2_t v) { + // CHECK: test_vmulxq_laneq_f64_0 + return vmulxq_laneq_f64(a, v, 0); + // CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +} + diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index 2762ecb438..f672c39888 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -2210,10 +2210,17 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { return Flags.getFlags(); } +// We don't check 'a' in this function, because for builtin function the +// argument matching to 'a' uses a vector type splatted from a scalar type. static bool ProtoHasScalar(const std::string proto) { return (proto.find('s') != std::string::npos - || proto.find('r') != std::string::npos); + || proto.find('z') != std::string::npos + || proto.find('r') != std::string::npos + || proto.find('b') != std::string::npos + || proto.find('$') != std::string::npos + || proto.find('y') != std::string::npos + || proto.find('o') != std::string::npos); } // Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a) @@ -2783,6 +2790,8 @@ NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, PrintFatalError(R->getLoc(), "Builtin has no class kind"); ClassKind ck = ClassMap[R->getSuperClasses()[1]]; + if (!ProtoHasScalar(Proto)) + ck = ClassB; // Do not include AArch64 range checks if not generating code for AArch64. bool isA64 = R->getValueAsBit("isA64"); @@ -2821,17 +2830,15 @@ NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, rangestr += "u = " + utostr(RangeScalarShiftImm(Proto[immPos - 1], TypeVec[ti])); - } else if (!ProtoHasScalar(Proto)) { + } else if (R->getValueAsBit("isShift")) { // Builtins which are overloaded by type will need to have their upper // bound computed at Sema time based on the type constant. - ck = ClassB; - if (R->getValueAsBit("isShift")) { - shiftstr = ", true"; + shiftstr = ", true"; + + // Right shifts have an 'r' in the name, left shifts do not. + if (name.find('r') != std::string::npos) + rangestr = "l = 1; "; - // Right shifts have an 'r' in the name, left shifts do not. - if (name.find('r') != std::string::npos) - rangestr = "l = 1; "; - } rangestr += "u = RFT(TV" + shiftstr + ")"; } else { // The immediate generally refers to a lane in the preceding argument.