From d5cf0c957e9b64b1bf65f5aa3e866378aec2639b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 8 Feb 2019 18:57:38 +0000 Subject: [PATCH] [TargetLowering] Use ISD::FSHR in expandFixedPointMul Replace OR(SHL,SRL) pattern with ISD::FSHR (legalization expands this later if necessary) - this helps with the scale == 0 'undefined' drop-through case that was discussed on D55720. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@353546 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 7 ++----- test/CodeGen/X86/umul_fix.ll | 12 ++++++------ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c8b66d05a01..0f343f5989f 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5512,9 +5512,6 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { // are scaled. The result is given to us in 2 halves, so we only want part of // both in the result. EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout()); - Lo = DAG.getNode(ISD::SRL, dl, VT, Lo, DAG.getConstant(Scale, dl, ShiftTy)); - Hi = DAG.getNode( - ISD::SHL, dl, VT, Hi, - DAG.getConstant(VT.getScalarSizeInBits() - Scale, dl, ShiftTy)); - return DAG.getNode(ISD::OR, dl, VT, Lo, Hi); + return DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo, + DAG.getConstant(Scale, dl, ShiftTy)); } diff --git a/test/CodeGen/X86/umul_fix.ll b/test/CodeGen/X86/umul_fix.ll index b50a00eaae3..6f7e22245b7 100644 --- a/test/CodeGen/X86/umul_fix.ll +++ b/test/CodeGen/X86/umul_fix.ll @@ -104,16 +104,16 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X64: # %bb.0: ; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] ; X64-NEXT: pmuludq %xmm1, %xmm0 -; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3] ; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; X64-NEXT: pmuludq %xmm2, %xmm1 -; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] ; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; X64-NEXT: pslld $30, %xmm3 -; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X64-NEXT: psrld $2, %xmm3 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] ; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-NEXT: psrld $2, %xmm0 +; X64-NEXT: pslld $30, %xmm0 ; X64-NEXT: por %xmm3, %xmm0 ; X64-NEXT: retq ; -- 2.40.0