From 6f30b9797e71585fda697a6d6f5701dd2c7a66d6 Mon Sep 17 00:00:00 2001 From: Ayman Musa Date: Thu, 23 Feb 2017 13:15:44 +0000 Subject: [PATCH] [X86][AVX] Disable VCVTSS2SD & VCVTSD2SS memory folding and fix the register class of their first input when creating node in fast-isel. (Quick fix to buildbot failure after rL295940 commit). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295970 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 9 +++++++-- lib/Target/X86/X86InstrInfo.cpp | 4 ---- test/CodeGen/X86/avx-cvt.ll | 3 ++- test/CodeGen/X86/stack-folding-fp-avx1.ll | 22 ---------------------- 4 files changed, 9 insertions(+), 29 deletions(-) diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index c890fdd1e51..278d9630745 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2427,8 +2427,13 @@ bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I, MachineInstrBuilder MIB; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc), ResultReg); - if (Subtarget->hasAVX()) - MIB.addReg(OpReg); + if (Subtarget->hasAVX()) { + unsigned ImplicitDefReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); + + MIB.addReg(ImplicitDefReg); + } MIB.addReg(OpReg); updateValueMap(I, ResultReg); return true; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 288597744c2..383af1a8e64 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1385,8 +1385,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PMULHRWrr, X86::PMULHRWrm, 0 }, // AVX 128-bit versions of foldable instructions - { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 }, - { X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, TB_NO_REVERSE }, { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 }, { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 }, { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 }, @@ -1395,8 +1393,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, - { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 }, - { X86::Int_VCVTSS2SDrr, X86::Int_VCVTSS2SDrm, TB_NO_REVERSE }, { X86::VADDPDrr, X86::VADDPDrm, 0 }, { X86::VADDPSrr, X86::VADDPSrm, 0 }, { X86::VADDSDrr, X86::VADDSDrm, 0 }, diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll index a7cd8cf2398..f2900dba938 100644 --- a/test/CodeGen/X86/avx-cvt.ll +++ b/test/CodeGen/X86/avx-cvt.ll @@ -136,7 +136,8 @@ define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp { define void @fpext() nounwind uwtable { ; CHECK-LABEL: fpext: ; CHECK: # BB#0: -; CHECK-NEXT: vcvtss2sd -{{[0-9]+}}(%rsp), %xmm0, %xmm0 +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: retq %f = alloca float, align 4 diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll index 5e939cc034d..72542f49908 100644 --- a/test/CodeGen/X86/stack-folding-fp-avx1.ll +++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll @@ -575,17 +575,6 @@ define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { } declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone -; TODO stack_fold_cvtsd2ss - -define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) { - ;CHECK-LABEL: stack_fold_cvtsd2ss_int - ;CHECK: vcvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload - %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> , <2 x double> %a0) - ret <4 x float> %2 -} -declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone - define double @stack_fold_cvtsi2sd(i32 %a0) { ;CHECK-LABEL: stack_fold_cvtsi2sd ;CHECK: vcvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload @@ -654,17 +643,6 @@ define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { } declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone -; TODO stack_fold_cvtss2sd - -define <2 x double> @stack_fold_cvtss2sd_int(<4 x float> %a0) { - ;CHECK-LABEL: stack_fold_cvtss2sd_int - ;CHECK: vcvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload - %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> , <4 x float> %a0) - ret <2 x double> %2 -} -declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone - ; TODO stack_fold_cvtss2si define i32 @stack_fold_cvtss2si_int(<4 x float> %a0) { -- 2.40.0