[X86] Add patterns to use VMOVSS/SD zero masking for scalar f32/f64 select with zero.

author Craig Topper <craig.topper@intel.com>

Thu, 12 Jul 2018 00:54:40 +0000 (00:54 +0000)

committer Craig Topper <craig.topper@intel.com>

Thu, 12 Jul 2018 00:54:40 +0000 (00:54 +0000)
author Craig Topper <craig.topper@intel.com>
Thu, 12 Jul 2018 00:54:40 +0000 (00:54 +0000)
committer Craig Topper <craig.topper@intel.com>
Thu, 12 Jul 2018 00:54:40 +0000 (00:54 +0000)
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td

index 57899034bd6371442e549d003c8254de7e38f2a0..615de67aadb148d972c2a51ebdd6c77956472777 100644 (file)
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -4237,11 +4237,19 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
             VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
             (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
  
+def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
+          (COPY_TO_REGCLASS (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
+           (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
+
  def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
            (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
             VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
             (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
  
+def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
+          (COPY_TO_REGCLASS (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
+           (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
+
  let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
    def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
                             (ins VR128X:$src1, VR128X:$src2),
diff --git a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll

index a37129aaf69c6c1ad3ca9d586a860c719418a273..55049ead617e816674ebb0b32506ed29f4a52831 100644 (file)
--- a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -9083,9 +9083,8 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
  ; X86-NEXT:    vfmadd231ss (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x01]
  ; X86-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
  ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
-; X86-NEXT:    vmovss %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc8]
-; X86-NEXT:    vmovss %xmm1, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x0a]
+; X86-NEXT:    vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
+; X86-NEXT:    vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02]
  ; X86-NEXT:    retl ## encoding: [0xc3]
  ;
  ; X64-LABEL: fmadd_ss_maskz_memfold:
@@ -9095,9 +9094,8 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
  ; X64-NEXT:    vfmadd231ss (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06]
  ; X64-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
  ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; X64-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
-; X64-NEXT:    vmovss %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc8]
-; X64-NEXT:    vmovss %xmm1, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x0f]
+; X64-NEXT:    vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
+; X64-NEXT:    vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %a.val = load float, float* %a
    %av0 = insertelement <4 x float> undef, float %a.val, i32 0
@@ -9173,9 +9171,8 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
  ; X86-NEXT:    vfmadd231sd (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x01]
  ; X86-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
  ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
-; X86-NEXT:    vmovsd %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc8]
-; X86-NEXT:    vmovsd %xmm1, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x0a]
+; X86-NEXT:    vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
+; X86-NEXT:    vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02]
  ; X86-NEXT:    retl ## encoding: [0xc3]
  ;
  ; X64-LABEL: fmadd_sd_maskz_memfold:
@@ -9185,9 +9182,8 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
  ; X64-NEXT:    vfmadd231sd (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06]
  ; X64-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
  ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; X64-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
-; X64-NEXT:    vmovsd %xmm0, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc8]
-; X64-NEXT:    vmovsd %xmm1, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x0f]
+; X64-NEXT:    vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
+; X64-NEXT:    vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
  ; X64-NEXT:    retq ## encoding: [0xc3]
    %a.val = load double, double* %a
    %av0 = insertelement <2 x double> undef, double %a.val, i32 0
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll

index c538972266b034b4cb6623e785411bc775f9b1b6..1b3066086b07b363d6c9a09e303d3c279d5b27bd 100644 (file)
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -4629,9 +4629,8 @@ define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
  ; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  ; CHECK-NEXT:    vfmadd231ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
  ; CHECK-NEXT:    kmovw %edx, %k1
-; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vmovss %xmm0, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT:    vmovss %xmm1, (%rdi)
+; CHECK-NEXT:    vmovss %xmm0, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vmovss %xmm0, (%rdi)
  ; CHECK-NEXT:    retq
    %a.val = load float, float* %a
    %av0 = insertelement <4 x float> undef, float %a.val, i32 0
@@ -4693,9 +4692,8 @@ define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
  ; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
  ; CHECK-NEXT:    vfmadd231sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
  ; CHECK-NEXT:    kmovw %edx, %k1
-; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vmovsd %xmm0, %xmm0, %xmm1 {%k1}
-; CHECK-NEXT:    vmovsd %xmm1, (%rdi)
+; CHECK-NEXT:    vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vmovsd %xmm0, (%rdi)
  ; CHECK-NEXT:    retq
    %a.val = load double, double* %a
    %av0 = insertelement <2 x double> undef, double %a.val, i32 0
author	Craig Topper <craig.topper@intel.com>
	Thu, 12 Jul 2018 00:54:40 +0000 (00:54 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Thu, 12 Jul 2018 00:54:40 +0000 (00:54 +0000)
lib/Target/X86/X86InstrAVX512.td		patch \| blob \| history
test/CodeGen/X86/avx512-intrinsics-upgrade.ll		patch \| blob \| history
test/CodeGen/X86/avx512-intrinsics.ll		patch \| blob \| history