From 170a24b0b643f0ad3a59e107e52f1bae78b9616d Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Thu, 13 Jun 2019 15:54:20 +0000 Subject: [PATCH] [NFC][CodeGen] Add unary FNeg tests to X86/avx512-intrinsics-fast-isel.ll Patch 2 of n. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363275 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/avx512-intrinsics-fast-isel.ll | 757 +++++++++++++++++- 1 file changed, 756 insertions(+), 1 deletion(-) diff --git a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index 79ac74d72c9..6f790845a24 100644 --- a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -4705,6 +4705,27 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_maskz_fmsubadd_round_pd_unary_fneg(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { +; X86-LABEL: test_mm512_maskz_fmsubadd_round_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_maskz_fmsubadd_round_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %neg = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %neg, i32 8) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer + ret <8 x double> %2 +} + define <8 x double> @test_mm512_fmaddsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; CHECK-LABEL: test_mm512_fmaddsub_pd: ; CHECK: # %bb.0: # %entry @@ -4718,6 +4739,19 @@ entry: ret <8 x double> %3 } +define <8 x double> @test_mm512_fmaddsub_pd_unary_fneg(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { +; CHECK-LABEL: test_mm512_fmaddsub_pd_unary_fneg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 +; CHECK-NEXT: ret{{[l|q]}} +entry: + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 + %1 = fneg <8 x double> %__C + %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10 + %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + ret <8 x double> %3 +} + define <8 x double> @test_mm512_mask_fmaddsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fmaddsub_pd: ; X86: # %bb.0: # %entry @@ -4741,6 +4775,29 @@ entry: ret <8 x double> %5 } +define <8 x double> @test_mm512_mask_fmaddsub_pd_unary_fneg(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { +; X86-LABEL: test_mm512_mask_fmaddsub_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmaddsub132pd {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fmaddsub_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmaddsub132pd {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2 +; X64-NEXT: retq +entry: + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 + %1 = fneg <8 x double> %__C + %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10 + %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + %4 = bitcast i8 %__U to <8 x i1> + %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__A + ret <8 x double> %5 +} + define <8 x double> @test_mm512_mask3_fmaddsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmaddsub_pd: ; X86: # %bb.0: # %entry @@ -4766,6 +4823,31 @@ entry: ret <8 x double> %5 } +define <8 x double> @test_mm512_mask3_fmaddsub_pd_unary_fneg(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fmaddsub_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmaddsub231pd {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2 +; X86-NEXT: vmovapd %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fmaddsub_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmaddsub231pd {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2 +; X64-NEXT: vmovapd %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 + %1 = fneg <8 x double> %__C + %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10 + %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + %4 = bitcast i8 %__U to <8 x i1> + %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__C + ret <8 x double> %5 +} + define <8 x double> @test_mm512_maskz_fmaddsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fmaddsub_pd: ; X86: # %bb.0: # %entry @@ -4789,6 +4871,29 @@ entry: ret <8 x double> %5 } +define <8 x double> @test_mm512_maskz_fmaddsub_pd_unary_fneg(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { +; X86-LABEL: test_mm512_maskz_fmaddsub_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_maskz_fmaddsub_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 +; X64-NEXT: retq +entry: + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 + %1 = fneg <8 x double> %__C + %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10 + %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> + %4 = bitcast i8 %__U to <8 x i1> + %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> zeroinitializer + ret <8 x double> %5 +} + define <8 x double> @test_mm512_fmsubadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; CHECK-LABEL: test_mm512_fmsubadd_pd: ; CHECK: # %bb.0: # %entry @@ -4802,6 +4907,19 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_fmsubadd_pd_unary_fneg(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { +; CHECK-LABEL: test_mm512_fmsubadd_pd_unary_fneg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmsubadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2 +; CHECK-NEXT: ret{{[l|q]}} +entry: + %neg.i = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %neg.i) #10 + %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 + %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> + ret <8 x double> %2 +} + define <8 x double> @test_mm512_mask_fmsubadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fmsubadd_pd: ; X86: # %bb.0: # %entry @@ -4825,6 +4943,29 @@ entry: ret <8 x double> %4 } +define <8 x double> @test_mm512_mask_fmsubadd_pd_unary_fneg(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { +; X86-LABEL: test_mm512_mask_fmsubadd_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd132pd {{.*#+}} zmm0 = (zmm0 * zmm1) -/+ zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fmsubadd_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd132pd {{.*#+}} zmm0 = (zmm0 * zmm1) -/+ zmm2 +; X64-NEXT: retq +entry: + %neg.i = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %neg.i) #10 + %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 + %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__A + ret <8 x double> %4 +} + define <8 x double> @test_mm512_maskz_fmsubadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fmsubadd_pd: ; X86: # %bb.0: # %entry @@ -4848,6 +4989,29 @@ entry: ret <8 x double> %4 } +define <8 x double> @test_mm512_maskz_fmsubadd_pd_unary_fneg(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { +; X86-LABEL: test_mm512_maskz_fmsubadd_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_maskz_fmsubadd_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2 +; X64-NEXT: retq +entry: + %neg.i = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %neg.i) #10 + %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 + %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer + ret <8 x double> %4 +} + define <16 x float> @test_mm512_fmaddsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { ; CHECK-LABEL: test_mm512_fmaddsub_round_ps: ; CHECK: # %bb.0: # %entry @@ -4940,6 +5104,24 @@ entry: ret <16 x float> %0 } +define <16 x float> @test_mm512_fmsubadd_round_ps_unary_fneg(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { +; X86-LABEL: test_mm512_fmsubadd_round_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: vpxord {{\.LCPI.*}}{1to16}, %zmm2, %zmm2 +; X86-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_fmsubadd_round_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm2, %zmm2 +; X64-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 +; X64-NEXT: retq +entry: + %neg = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %neg, i32 8) + ret <16 x float> %0 +} + define <16 x float> @test_mm512_mask_fmsubadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { ; X86-LABEL: test_mm512_mask_fmsubadd_round_ps: ; X86: # %bb.0: # %entry @@ -4961,6 +5143,27 @@ entry: ret <16 x float> %2 } +define <16 x float> @test_mm512_mask_fmsubadd_round_ps_unary_fneg(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { +; X86-LABEL: test_mm512_mask_fmsubadd_round_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fmsubadd_round_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} +; X64-NEXT: retq +entry: + %neg = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %neg, i32 8) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + ret <16 x float> %2 +} + define <16 x float> @test_mm512_maskz_fmsubadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { ; X86-LABEL: test_mm512_maskz_fmsubadd_round_ps: ; X86: # %bb.0: # %entry @@ -4982,6 +5185,27 @@ entry: ret <16 x float> %2 } +define <16 x float> @test_mm512_maskz_fmsubadd_round_ps_unary_fneg(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { +; X86-LABEL: test_mm512_maskz_fmsubadd_round_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_maskz_fmsubadd_round_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} +; X64-NEXT: retq +entry: + %neg = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %neg, i32 8) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer + ret <16 x float> %2 +} + define <16 x float> @test_mm512_fmaddsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { ; CHECK-LABEL: test_mm512_fmaddsub_ps: ; CHECK: # %bb.0: # %entry @@ -4995,6 +5219,19 @@ entry: ret <16 x float> %3 } +define <16 x float> @test_mm512_fmaddsub_ps_unary_fneg(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { +; CHECK-LABEL: test_mm512_fmaddsub_ps_unary_fneg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 +; CHECK-NEXT: ret{{[l|q]}} +entry: + %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 + %1 = fneg <16 x float> %__C + %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10 + %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + ret <16 x float> %3 +} + define <16 x float> @test_mm512_mask_fmaddsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { ; X86-LABEL: test_mm512_mask_fmaddsub_ps: ; X86: # %bb.0: # %entry @@ -5018,6 +5255,29 @@ entry: ret <16 x float> %5 } +define <16 x float> @test_mm512_mask_fmaddsub_ps_unary_fneg(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { +; X86-LABEL: test_mm512_mask_fmaddsub_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fmaddsub_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmaddsub132ps {{.*#+}} zmm0 = (zmm0 * zmm1) +/- zmm2 +; X64-NEXT: retq +entry: + %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 + %1 = fneg <16 x float> %__C + %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10 + %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + %4 = bitcast i16 %__U to <16 x i1> + %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__A + ret <16 x float> %5 +} + define <16 x float> @test_mm512_mask3_fmaddsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmaddsub_ps: ; X86: # %bb.0: # %entry @@ -5043,6 +5303,31 @@ entry: ret <16 x float> %5 } +define <16 x float> @test_mm512_mask3_fmaddsub_ps_unary_fneg(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fmaddsub_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmaddsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2 +; X86-NEXT: vmovaps %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fmaddsub_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmaddsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) +/- zmm2 +; X64-NEXT: vmovaps %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 + %1 = fneg <16 x float> %__C + %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10 + %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + %4 = bitcast i16 %__U to <16 x i1> + %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__C + ret <16 x float> %5 +} + define <16 x float> @test_mm512_maskz_fmaddsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { ; X86-LABEL: test_mm512_maskz_fmaddsub_ps: ; X86: # %bb.0: # %entry @@ -5066,6 +5351,29 @@ entry: ret <16 x float> %5 } +define <16 x float> @test_mm512_maskz_fmaddsub_ps_unary_fneg(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { +; X86-LABEL: test_mm512_maskz_fmaddsub_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_maskz_fmaddsub_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2 +; X64-NEXT: retq +entry: + %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 + %1 = fneg <16 x float> %__C + %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10 + %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> + %4 = bitcast i16 %__U to <16 x i1> + %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> zeroinitializer + ret <16 x float> %5 +} + define <16 x float> @test_mm512_fmsubadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { ; CHECK-LABEL: test_mm512_fmsubadd_ps: ; CHECK: # %bb.0: # %entry @@ -5079,7 +5387,20 @@ entry: ret <16 x float> %2 } -define <16 x float> @test_mm512_mask_fmsubadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { +define <16 x float> @test_mm512_fmsubadd_ps_unary_fneg(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { +; CHECK-LABEL: test_mm512_fmsubadd_ps_unary_fneg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmsubadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2 +; CHECK-NEXT: ret{{[l|q]}} +entry: + %neg.i = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %neg.i) #10 + %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 + %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> + ret <16 x float> %2 +} + +define <16 x float> @test_mm512_mask_fmsubadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { ; X86-LABEL: test_mm512_mask_fmsubadd_ps: ; X86: # %bb.0: # %entry ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax @@ -5102,6 +5423,29 @@ entry: ret <16 x float> %4 } +define <16 x float> @test_mm512_mask_fmsubadd_ps_unary_fneg(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { +; X86-LABEL: test_mm512_mask_fmsubadd_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) -/+ zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fmsubadd_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) -/+ zmm2 +; X64-NEXT: retq +entry: + %neg.i = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %neg.i) #10 + %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 + %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__A + ret <16 x float> %4 +} + define <16 x float> @test_mm512_maskz_fmsubadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { ; X86-LABEL: test_mm512_maskz_fmsubadd_ps: ; X86: # %bb.0: # %entry @@ -5125,6 +5469,29 @@ entry: ret <16 x float> %4 } +define <16 x float> @test_mm512_maskz_fmsubadd_ps_unary_fneg(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) { +; X86-LABEL: test_mm512_maskz_fmsubadd_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_maskz_fmsubadd_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2 +; X64-NEXT: retq +entry: + %neg.i = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %neg.i) #10 + %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 + %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer + ret <16 x float> %4 +} + define <8 x double> @test_mm512_mask3_fmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsub_round_pd: ; X86: # %bb.0: # %entry @@ -5148,6 +5515,29 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_mask3_fmsub_round_pd_unary_fneg(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fmsub_round_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X86-NEXT: vmovapd %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fmsub_round_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X64-NEXT: vmovapd %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %neg, i32 8) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + ret <8 x double> %2 +} + define <8 x double> @test_mm512_mask3_fmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsub_pd: ; X86: # %bb.0: # %entry @@ -5171,6 +5561,29 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_mask3_fmsub_pd_unary_fneg(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fmsub_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsub231pd {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2 +; X86-NEXT: vmovapd %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fmsub_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsub231pd {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2 +; X64-NEXT: vmovapd %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg.i = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %neg.i) #10 + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + ret <8 x double> %2 +} + define <16 x float> @test_mm512_mask3_fmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsub_round_ps: ; X86: # %bb.0: # %entry @@ -5194,6 +5607,29 @@ entry: ret <16 x float> %2 } +define <16 x float> @test_mm512_mask3_fmsub_round_ps_unary_fneg(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fmsub_round_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X86-NEXT: vmovaps %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fmsub_round_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X64-NEXT: vmovaps %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %neg, i32 8) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + ret <16 x float> %2 +} + define <16 x float> @test_mm512_mask3_fmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsub_ps: ; X86: # %bb.0: # %entry @@ -5217,6 +5653,29 @@ entry: ret <16 x float> %2 } +define <16 x float> @test_mm512_mask3_fmsub_ps_unary_fneg(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fmsub_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2 +; X86-NEXT: vmovaps %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fmsub_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsub231ps {{.*#+}} zmm2 = (zmm0 * zmm1) - zmm2 +; X64-NEXT: vmovaps %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg.i = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %neg.i) #10 + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + ret <16 x float> %2 +} + define <8 x double> @test_mm512_mask3_fmsubadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsubadd_round_pd: ; X86: # %bb.0: # %entry @@ -5240,6 +5699,29 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_mask3_fmsubadd_round_pd_unary_fneg(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fmsubadd_round_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X86-NEXT: vmovapd %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fmsubadd_round_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X64-NEXT: vmovapd %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %neg, i32 8) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + ret <8 x double> %2 +} + define <8 x double> @test_mm512_mask3_fmsubadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsubadd_pd: ; X86: # %bb.0: # %entry @@ -5265,6 +5747,31 @@ entry: ret <8 x double> %4 } +define <8 x double> @test_mm512_mask3_fmsubadd_pd_unary_fneg(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fmsubadd_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd231pd {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2 +; X86-NEXT: vmovapd %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fmsubadd_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd231pd {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2 +; X64-NEXT: vmovapd %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg.i = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %neg.i) #10 + %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10 + %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__C + ret <8 x double> %4 +} + define <16 x float> @test_mm512_mask3_fmsubadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsubadd_round_ps: ; X86: # %bb.0: # %entry @@ -5288,6 +5795,29 @@ entry: ret <16 x float> %2 } +define <16 x float> @test_mm512_mask3_fmsubadd_round_ps_unary_fneg(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fmsubadd_round_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X86-NEXT: vmovaps %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fmsubadd_round_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X64-NEXT: vmovaps %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %neg, i32 8) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + ret <16 x float> %2 +} + define <16 x float> @test_mm512_mask3_fmsubadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsubadd_ps: ; X86: # %bb.0: # %entry @@ -5313,6 +5843,31 @@ entry: ret <16 x float> %4 } +define <16 x float> @test_mm512_mask3_fmsubadd_ps_unary_fneg(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fmsubadd_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfmsubadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2 +; X86-NEXT: vmovaps %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fmsubadd_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfmsubadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) -/+ zmm2 +; X64-NEXT: vmovaps %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg.i = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %neg.i) #10 + %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10 + %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__C + ret <16 x float> %4 +} + define <8 x double> @test_mm512_mask_fnmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fnmadd_round_pd: ; X86: # %bb.0: # %entry @@ -5334,6 +5889,27 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_mask_fnmadd_round_pd_unary_fneg(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { +; X86-LABEL: test_mm512_mask_fnmadd_round_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfnmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fnmadd_round_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfnmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} +; X64-NEXT: retq +entry: + %neg = fneg <8 x double> %__A + %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %neg, <8 x double> %__B, <8 x double> %__C, i32 8) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + ret <8 x double> %2 +} + define <8 x double> @test_mm512_mask_fnmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fnmadd_pd: ; X86: # %bb.0: # %entry @@ -5355,6 +5931,27 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_mask_fnmadd_pd_unary_fneg(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { +; X86-LABEL: test_mm512_mask_fnmadd_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfnmadd132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fnmadd_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfnmadd132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2 +; X64-NEXT: retq +entry: + %neg.i = fneg <8 x double> %__A + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %neg.i, <8 x double> %__B, <8 x double> %__C) #10 + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + ret <8 x double> %2 +} + define <16 x float> @test_mm512_mask_fnmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { ; X86-LABEL: test_mm512_mask_fnmadd_round_ps: ; X86: # %bb.0: # %entry @@ -5376,6 +5973,27 @@ entry: ret <16 x float> %2 } +define <16 x float> @test_mm512_mask_fnmadd_round_ps_unary_fneg(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { +; X86-LABEL: test_mm512_mask_fnmadd_round_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfnmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fnmadd_round_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfnmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} +; X64-NEXT: retq +entry: + %neg = fneg <16 x float> %__A + %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %neg, <16 x float> %__B, <16 x float> %__C, i32 8) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + ret <16 x float> %2 +} + define <16 x float> @test_mm512_mask_fnmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { ; X86-LABEL: test_mm512_mask_fnmadd_ps: ; X86: # %bb.0: # %entry @@ -5397,6 +6015,27 @@ entry: ret <16 x float> %2 } +define <16 x float> @test_mm512_mask_fnmadd_ps_unary_fneg(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { +; X86-LABEL: test_mm512_mask_fnmadd_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fnmadd_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfnmadd132ps {{.*#+}} zmm0 = -(zmm0 * zmm1) + zmm2 +; X64-NEXT: retq +entry: + %neg.i = fneg <16 x float> %__A + %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %neg.i, <16 x float> %__B, <16 x float> %__C) #10 + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A + ret <16 x float> %2 +} + define <8 x double> @test_mm512_mask_fnmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fnmsub_round_pd: ; X86: # %bb.0: # %entry @@ -5419,6 +6058,28 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_mask_fnmsub_round_pd_unary_fneg(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { +; X86-LABEL: test_mm512_mask_fnmsub_round_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fnmsub_round_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} +; X64-NEXT: retq +entry: + %neg = fneg <8 x double> %__B + %neg1 = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %neg, <8 x double> %neg1, i32 8) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + ret <8 x double> %2 +} + define <8 x double> @test_mm512_mask3_fnmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fnmsub_round_pd: ; X86: # %bb.0: # %entry @@ -5443,6 +6104,30 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_mask3_fnmsub_round_pd_unary_fneg(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fnmsub_round_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfnmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X86-NEXT: vmovapd %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fnmsub_round_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfnmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X64-NEXT: vmovapd %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg = fneg <8 x double> %__B + %neg1 = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %neg, <8 x double> %neg1, i32 8) + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + ret <8 x double> %2 +} + define <8 x double> @test_mm512_mask_fnmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fnmsub_pd: ; X86: # %bb.0: # %entry @@ -5465,6 +6150,28 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_mask_fnmsub_pd_unary_fneg(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { +; X86-LABEL: test_mm512_mask_fnmsub_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask_fnmsub_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfnmsub132pd {{.*#+}} zmm0 = -(zmm0 * zmm1) - zmm2 +; X64-NEXT: retq +entry: + %neg.i = fneg <8 x double> %__B + %neg2.i = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %neg.i, <8 x double> %neg2.i) #10 + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A + ret <8 x double> %2 +} + define <8 x double> @test_mm512_mask3_fnmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fnmsub_pd: ; X86: # %bb.0: # %entry @@ -5489,6 +6196,30 @@ entry: ret <8 x double> %2 } +define <8 x double> @test_mm512_mask3_fnmsub_pd_unary_fneg(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fnmsub_pd_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfnmsub231pd {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2 +; X86-NEXT: vmovapd %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fnmsub_pd_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfnmsub231pd {{.*#+}} zmm2 = -(zmm0 * zmm1) - zmm2 +; X64-NEXT: vmovapd %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg.i = fneg <8 x double> %__B + %neg2.i = fneg <8 x double> %__C + %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %neg.i, <8 x double> %neg2.i) #10 + %1 = bitcast i8 %__U to <8 x i1> + %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C + ret <8 x double> %2 +} + define <16 x float> @test_mm512_mask_fnmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { ; X86-LABEL: test_mm512_mask_fnmsub_round_ps: ; X86: # %bb.0: # %entry @@ -5535,6 +6266,30 @@ entry: ret <16 x float> %2 } +define <16 x float> @test_mm512_mask3_fnmsub_round_ps_unary_fneg(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) { +; X86-LABEL: test_mm512_mask3_fnmsub_round_ps_unary_fneg: +; X86: # %bb.0: # %entry +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kmovw %eax, %k1 +; X86-NEXT: vfnmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X86-NEXT: vmovaps %zmm2, %zmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_mm512_mask3_fnmsub_round_ps_unary_fneg: +; X64: # %bb.0: # %entry +; X64-NEXT: kmovw %edi, %k1 +; X64-NEXT: vfnmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} +; X64-NEXT: vmovaps %zmm2, %zmm0 +; X64-NEXT: retq +entry: + %neg = fneg <16 x float> %__B + %neg1 = fneg <16 x float> %__C + %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %neg, <16 x float> %neg1, i32 8) + %1 = bitcast i16 %__U to <16 x i1> + %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C + ret <16 x float> %2 +} + define <16 x float> @test_mm512_mask_fnmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) { ; X86-LABEL: test_mm512_mask_fnmsub_ps: ; X86: # %bb.0: # %entry -- 2.50.1