From: Simon Pilgrim Date: Sat, 30 Sep 2017 17:57:34 +0000 (+0000) Subject: [X86][SSE] Fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=206263fa1eedd9a578f6ab0bf1719b838e7d64df;p=llvm [X86][SSE] Fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 Remove sign extend in register style pattern if the sign is already extended enough git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314599 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b927178265a..2d7cf5c1b98 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -31845,6 +31845,15 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, N0.getOpcode() == X86ISD::VSRAI) return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1); + // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 + if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSHLI && + N1 == N0.getOperand(1)) { + SDValue N00 = N0.getOperand(0); + unsigned NumSignBits = DAG.ComputeNumSignBits(N00); + if (ShiftVal.ult(NumSignBits)) + return N00; + } + // We can decode 'whole byte' logical bit shifts as shuffles. if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) { SDValue Op(N, 0); diff --git a/test/CodeGen/X86/vector-trunc.ll b/test/CodeGen/X86/vector-trunc.ll index 5548ee10a57..bb8fd19bfe6 100644 --- a/test/CodeGen/X86/vector-trunc.ll +++ b/test/CodeGen/X86/vector-trunc.ll @@ -389,11 +389,7 @@ entry: define <8 x i16> @trunc8i32_8i16_ashr(<8 x i32> %a) { ; SSE2-LABEL: trunc8i32_8i16_ashr: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm1 ; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm0 ; SSE2-NEXT: psrad $16, %xmm0 ; SSE2-NEXT: packssdw %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -727,40 +723,24 @@ entry: define void @trunc16i32_16i16_ashr(<16 x i32> %a) { ; SSE2-LABEL: trunc16i32_16i16_ashr: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: psrad $16, %xmm2 ; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: packssdw %xmm3, %xmm2 ; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pslld $16, %xmm0 ; SSE2-NEXT: psrad $16, %xmm0 ; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: pslld $16, %xmm3 -; SSE2-NEXT: psrad $16, %xmm3 -; SSE2-NEXT: pslld $16, %xmm2 -; SSE2-NEXT: psrad $16, %xmm2 -; SSE2-NEXT: packssdw %xmm3, %xmm2 ; SSE2-NEXT: movdqu %xmm2, (%rax) ; SSE2-NEXT: movdqu %xmm0, (%rax) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: trunc16i32_16i16_ashr: ; SSSE3: # BB#0: # %entry -; SSSE3-NEXT: psrad $16, %xmm2 ; SSSE3-NEXT: psrad $16, %xmm3 -; SSSE3-NEXT: psrad $16, %xmm0 -; SSSE3-NEXT: psrad $16, %xmm1 -; SSSE3-NEXT: pslld $16, %xmm1 +; SSSE3-NEXT: psrad $16, %xmm2 +; SSSE3-NEXT: packssdw %xmm3, %xmm2 ; SSSE3-NEXT: psrad $16, %xmm1 -; SSSE3-NEXT: pslld $16, %xmm0 ; SSSE3-NEXT: psrad $16, %xmm0 ; SSSE3-NEXT: packssdw %xmm1, %xmm0 -; SSSE3-NEXT: pslld $16, %xmm3 -; SSSE3-NEXT: psrad $16, %xmm3 -; SSSE3-NEXT: pslld $16, %xmm2 -; SSSE3-NEXT: psrad $16, %xmm2 -; SSSE3-NEXT: packssdw %xmm3, %xmm2 ; SSSE3-NEXT: movdqu %xmm2, (%rax) ; SSSE3-NEXT: movdqu %xmm0, (%rax) ; SSSE3-NEXT: retq