From f9f3d363763f94ab98d5c2865ecd7107b01c7672 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 6 Nov 2017 05:48:26 +0000 Subject: [PATCH] [X86] Use EVEX encoded intrinsics for legacy FMA intrinsics when possible. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317454 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFMA.td | 80 +++++++++++++------------- lib/Target/X86/X86IntrinsicsInfo.h | 8 +++ test/CodeGen/X86/fma-intrinsics-x86.ll | 32 +++++------ 3 files changed, 65 insertions(+), 55 deletions(-) diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 453dcd83df1..15466c2978f 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -290,8 +290,7 @@ multiclass fma3s_int_forms opc132, bits<8> opc213, bits<8> opc231, } multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, - string OpStr, Intrinsic IntF32, Intrinsic IntF64, - SDNode OpNode> { + string OpStr, SDNode OpNodeIntrin, SDNode OpNode> { let ExeDomain = SSEPackedSingle in defm NAME : fma3s_forms, @@ -309,43 +308,44 @@ multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, // This is because src1 is tied to dest, and the scalar intrinsics // require the pass-through values to come from the first source // operand, not the second. - // TODO: Use AVX512 instructions when possible. - let Predicates = [HasFMA] in { - def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3), + let Predicates = [HasFMA, NoAVX512] in { + def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)), (!cast(NAME#"213SSr_Int") VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3), + def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)), (!cast(NAME#"213SDr_Int") VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(IntF32 VR128:$src1, VR128:$src2, sse_load_f32:$src3), + def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2, + sse_load_f32:$src3)), (!cast(NAME#"213SSm_Int") VR128:$src1, VR128:$src2, sse_load_f32:$src3)>; - def : Pat<(IntF64 VR128:$src1, VR128:$src2, sse_load_f64:$src3), + def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2, + sse_load_f64:$src3)), (!cast(NAME#"213SDm_Int") VR128:$src1, VR128:$src2, sse_load_f64:$src3)>; - def : Pat<(IntF32 VR128:$src1, sse_load_f32:$src3, VR128:$src2), + def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, sse_load_f32:$src3, + VR128:$src2)), (!cast(NAME#"132SSm_Int") VR128:$src1, VR128:$src2, sse_load_f32:$src3)>; - def : Pat<(IntF64 VR128:$src1, sse_load_f64:$src3, VR128:$src2), + def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, sse_load_f64:$src3, + VR128:$src2)), (!cast(NAME#"132SDm_Int") VR128:$src1, VR128:$src2, sse_load_f64:$src3)>; } } -defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, - int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG; -defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, - int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG; +defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadds1, X86Fmadd>, VEX_LIG; +defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsubs1, X86Fmsub>, VEX_LIG; -defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, - int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG; -defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, - int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG; +defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadds1, X86Fnmadd>, + VEX_LIG; +defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsubs1, X86Fnmsub>, + VEX_LIG; //===----------------------------------------------------------------------===// @@ -385,26 +385,28 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in } multiclass fma4s_int opc, string OpcodeStr, Operand memop, - ComplexPattern mem_cpat, Intrinsic Int> { + ValueType VT, ComplexPattern mem_cpat, SDNode OpNode> { let isCodeGenOnly = 1 in { def rr_Int : FMA4, VEX_W, VEX_LIG; + (VT (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, VEX_W, + VEX_LIG; def rm_Int : FMA4, VEX_W, VEX_LIG; + [(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2, + mem_cpat:$src3)))]>, VEX_W, VEX_LIG; def mr_Int : FMA4, VEX_LIG; + (VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>, + VEX_LIG; let hasSideEffects = 0 in def rr_Int_REV : FMA4, - fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, - int_x86_fma_vfmadd_ss>; + fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, sse_load_f32, + X86Fmadds1>; defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, - fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, - int_x86_fma_vfmsub_ss>; + fma4s_int<0x6E, "vfmsubss", ssmem, v4f32, sse_load_f32, + X86Fmsubs1>; defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, X86Fnmadd, loadf32>, - fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, - int_x86_fma_vfnmadd_ss>; + fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32, sse_load_f32, + X86Fnmadds1>; defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, X86Fnmsub, loadf32>, - fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, - int_x86_fma_vfnmsub_ss>; + fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, sse_load_f32, + X86Fnmsubs1>; // Packed Instructions defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, loadv4f32, loadv8f32>; @@ -506,19 +508,19 @@ let ExeDomain = SSEPackedSingle in { let ExeDomain = SSEPackedDouble in { // Scalar Instructions defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, - fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfmadd_sd>; + fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, sse_load_f64, + X86Fmadds1>; defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, - fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfmsub_sd>; + fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64, sse_load_f64, + X86Fmsubs1>; defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, X86Fnmadd, loadf64>, - fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfnmadd_sd>; + fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64, sse_load_f64, + X86Fnmadds1>; defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, X86Fnmsub, loadf64>, - fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfnmsub_sd>; + fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, sse_load_f64, + X86Fnmsubs1>; // Packed Instructions defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, loadv2f64, loadv4f64>; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 6132234c006..128d0942a73 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -1468,6 +1468,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, ISD::FMA, 0), + X86_INTRINSIC_DATA(fma_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADDS1, 0), + X86_INTRINSIC_DATA(fma_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADDS1, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), @@ -1476,6 +1478,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfmsub_pd_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(fma_vfmsub_ps, INTR_TYPE_3OP, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(fma_vfmsub_ps_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(fma_vfmsub_sd, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0), + X86_INTRINSIC_DATA(fma_vfmsub_ss, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0), X86_INTRINSIC_DATA(fma_vfmsubadd_pd, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_vfmsubadd_ps, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0), @@ -1484,10 +1488,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfnmadd_pd_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0), X86_INTRINSIC_DATA(fma_vfnmadd_ps, INTR_TYPE_3OP, X86ISD::FNMADD, 0), X86_INTRINSIC_DATA(fma_vfnmadd_ps_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(fma_vfnmadd_sd, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0), + X86_INTRINSIC_DATA(fma_vfnmadd_ss, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0), X86_INTRINSIC_DATA(fma_vfnmsub_pd, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(fma_vfnmsub_sd, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0), + X86_INTRINSIC_DATA(fma_vfnmsub_ss, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0), X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0), X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE), diff --git a/test/CodeGen/X86/fma-intrinsics-x86.ll b/test/CodeGen/X86/fma-intrinsics-x86.ll index 6ec2f7849f8..362864f72a9 100644 --- a/test/CodeGen/X86/fma-intrinsics-x86.ll +++ b/test/CodeGen/X86/fma-intrinsics-x86.ll @@ -13,7 +13,7 @@ define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2] +; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss: @@ -40,7 +40,7 @@ define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_ss: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xa9,0xca] +; CHECK-AVX512VL-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xca] ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; @@ -68,7 +68,7 @@ define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_sd: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] +; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd: @@ -95,7 +95,7 @@ define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> % ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_sd: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xa9,0xca] +; CHECK-AVX512VL-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xca] ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; @@ -232,7 +232,7 @@ define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ss: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2] +; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss: @@ -259,7 +259,7 @@ define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_ss: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xab,0xca] +; CHECK-AVX512VL-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0xca] ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; @@ -287,7 +287,7 @@ define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_sd: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2] +; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd: @@ -314,7 +314,7 @@ define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> % ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_sd: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xab,0xca] +; CHECK-AVX512VL-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0xca] ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; @@ -451,7 +451,7 @@ define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ss: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2] +; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss: @@ -478,7 +478,7 @@ define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1 ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_ss: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xad,0xca] +; CHECK-AVX512VL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0xca] ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; @@ -506,7 +506,7 @@ define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_sd: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2] +; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd: @@ -533,7 +533,7 @@ define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_sd: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xad,0xca] +; CHECK-AVX512VL-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0xca] ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; @@ -670,7 +670,7 @@ define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ss: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2] +; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss: @@ -697,7 +697,7 @@ define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1 ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_ss: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xaf,0xca] +; CHECK-AVX512VL-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0xca] ; CHECK-AVX512VL-NEXT: vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; @@ -725,7 +725,7 @@ define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_sd: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2] +; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd: @@ -752,7 +752,7 @@ define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> ; ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_sd: ; CHECK-AVX512VL: # BB#0: -; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xaf,0xca] +; CHECK-AVX512VL-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0xca] ; CHECK-AVX512VL-NEXT: vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] ; CHECK-AVX512VL-NEXT: retq # encoding: [0xc3] ; -- 2.50.1