From 3e593b0e11aaf1a8d9adf3d390ff3a25b90d400a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 3 Jun 2019 19:29:14 +0000 Subject: [PATCH] [X86] Fix the pattern for merge masked vcvtps2pd. r362199 fixed it for zero masking, but not zero masking. The load folding in the peephole pass hid the bug. This patch turns off the peephole pass on the relevant test to ensure coverage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362440 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 5 +--- test/CodeGen/X86/avx512-cvt.ll | 43 +++++++++++++++++++++++++------- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 753f1b71b07..eebb6401db0 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -7629,10 +7629,7 @@ multiclass avx512_vcvt_fp opc, string OpcodeStr, X86VectorVTInfo _, (ins MaskRC:$mask, MemOp:$src), OpcodeStr#Alias, "$src", "$src", LdDAG, - (vselect MaskRC:$mask, - (_.VT (OpNode (_Src.VT - (_Src.LdFrag addr:$src)))), - _.RC:$src0), + (vselect MaskRC:$mask, LdDAG, _.RC:$src0), vselect, "$src0 = $dst">, EVEX, Sched<[sched.Folded]>; diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index c42be0d0f1c..2c8978d4a01 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=DQNOVL --check-prefix=AVX512DQ +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW define <16 x float> @sitof32(<16 x i32> %a) nounwind { @@ -786,9 +786,34 @@ define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x doubl ret <4 x double> %c } -define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) { +define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1, <4 x double> %passthru) { ; NOVL-LABEL: f32to4f64_mask_load: ; NOVL: # %bb.0: +; NOVL-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 +; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; NOVL-NEXT: vcvtps2pd (%rdi), %ymm3 +; NOVL-NEXT: vcmpltpd %zmm1, %zmm0, %k1 +; NOVL-NEXT: vblendmpd %zmm3, %zmm2, %zmm0 {%k1} +; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; NOVL-NEXT: retq +; +; VL-LABEL: f32to4f64_mask_load: +; VL: # %bb.0: +; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1 +; VL-NEXT: vcvtps2pd (%rdi), %ymm2 {%k1} +; VL-NEXT: vmovaps %ymm2, %ymm0 +; VL-NEXT: retq + %b = load <4 x float>, <4 x float>* %p + %a = fpext <4 x float> %b to <4 x double> + %mask = fcmp ogt <4 x double> %a1, %b1 + %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> %passthru + ret <4 x double> %c +} + +define <4 x double> @f32to4f64_maskz_load(<4 x float>* %p, <4 x double> %b1, <4 x double> %a1) { +; NOVL-LABEL: f32to4f64_maskz_load: +; NOVL: # %bb.0: ; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NOVL-NEXT: vcvtps2pd (%rdi), %ymm2 @@ -797,7 +822,7 @@ define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x ; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NOVL-NEXT: retq ; -; VL-LABEL: f32to4f64_mask_load: +; VL-LABEL: f32to4f64_maskz_load: ; VL: # %bb.0: ; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1 ; VL-NEXT: vcvtps2pd (%rdi), %ymm0 {%k1} {z} -- 2.50.1