From: Craig Topper Date: Mon, 3 Jun 2019 19:29:14 +0000 (+0000) Subject: [X86] Fix the pattern for merge masked vcvtps2pd. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3e593b0e11aaf1a8d9adf3d390ff3a25b90d400a;p=llvm [X86] Fix the pattern for merge masked vcvtps2pd. r362199 fixed it for zero masking, but not zero masking. The load folding in the peephole pass hid the bug. This patch turns off the peephole pass on the relevant test to ensure coverage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362440 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 753f1b71b07..eebb6401db0 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7629,10 +7629,7 @@ multiclass avx512_vcvt_fp opc, string OpcodeStr, X86VectorVTInfo _, (ins MaskRC:$mask, MemOp:$src), OpcodeStr#Alias, "$src", "$src", LdDAG, - (vselect MaskRC:$mask, - (_.VT (OpNode (_Src.VT - (_Src.LdFrag addr:$src)))), - _.RC:$src0), + (vselect MaskRC:$mask, LdDAG, _.RC:$src0), vselect, "$src0 = $dst">, EVEX, Sched<[sched.Folded]>; diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index c42be0d0f1c..2c8978d4a01 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW define <16 x float> @sitof32(<16 x i32> %a) nounwind { @@ -786,9 +786,34 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x doubl ret <4 x double> %c } -define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) { +define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1, <4 x double> %passthru) { ; NOVL-LABEL: f32to4f64_mask_load: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; NOVL-NEXT: vcvtps2pd (%rdi), %ymm3 +; NOVL-NEXT: vcmpltpd %zmm1, %zmm0, %k1 +; NOVL-NEXT: vblendmpd %zmm3, %zmm2, %zmm0 {%k1} +; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; NOVL-NEXT: retq +; +; VL-LABEL: f32to4f64_mask_load: +; VL: # %bb.0: +; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1 +; VL-NEXT: vcvtps2pd (%rdi), %ymm2 {%k1} +; VL-NEXT: vmovaps %ymm2, %ymm0 +; VL-NEXT: retq + %b = load <4 x float>, <4 x float>* %p + %a = fpext <4 x float> %b to <4 x double> + %mask = fcmp ogt <4 x double> %a1, %b1 + %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> %passthru + ret <4 x double> %c +} + +define <4 x double> @f32to4f64_maskz_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) { +; NOVL-LABEL: f32to4f64_maskz_load: +; NOVL: # %bb.0: ; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NOVL-NEXT: vcvtps2pd (%rdi), %ymm2 @@ -797,7 +822,7 @@ define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x ; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NOVL-NEXT: retq ; -; VL-LABEL: f32to4f64_mask_load: +; VL-LABEL: f32to4f64_maskz_load: ; VL: # %bb.0: ; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1 ; VL-NEXT: vcvtps2pd (%rdi), %ymm0 {%k1} {z}