From f9440ffb0613e08bd161d037164c765e11fc563e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 22 Jan 2017 22:45:23 +0000 Subject: [PATCH] [X86][SSE] Add missing X86ISD::ANDNP combines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292767 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 15 ++++++++ .../X86/clear_upper_vector_element_bits.ll | 10 +---- test/CodeGen/X86/insertelement-zero.ll | 38 +++---------------- 3 files changed, 22 insertions(+), 41 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cb522389765..2b58c02afea 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32996,6 +32996,20 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax); } +/// Do target-specific dag combines on X86ISD::ANDNP nodes. +static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + // ANDNP(0, x) -> x + if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) + return N->getOperand(1); + + // ANDNP(x, 0) -> 0 + if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode())) + return getZeroVector(N->getSimpleValueType(0), Subtarget, DAG, SDLoc(N)); + + return SDValue(); +} + static SDValue combineBT(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { // BT ignores high bits in the bit index operand. @@ -34062,6 +34076,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, Subtarget); case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); + case X86ISD::ANDNP: return combineAndnp(N, DAG, Subtarget); case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); case X86ISD::FXOR: diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll index 6a4d83d392d..55ff7a2abe5 100644 --- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -393,15 +393,7 @@ define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind { ; SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] ; SSE-NEXT: pandn %xmm1, %xmm2 ; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: pxor %xmm2, %xmm2 -; SSE-NEXT: pandn %xmm2, %xmm1 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: pandn %xmm2, %xmm1 -; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: _clearupper8xi16b: diff --git a/test/CodeGen/X86/insertelement-zero.ll b/test/CodeGen/X86/insertelement-zero.ll index 13414a57f58..3b1a56ced30 100644 --- a/test/CodeGen/X86/insertelement-zero.ll +++ b/test/CodeGen/X86/insertelement-zero.ll @@ -449,11 +449,7 @@ define <16 x i8> @insert_v16i8_z123456789ABCDEz(<16 x i8> %a) { ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: pandn %xmm2, %xmm1 ; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_v16i8_z123456789ABCDEz: @@ -464,11 +460,7 @@ define <16 x i8> @insert_v16i8_z123456789ABCDEz(<16 x i8> %a) { ; SSE3-NEXT: movd %eax, %xmm2 ; SSE3-NEXT: pandn %xmm2, %xmm1 ; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: pxor %xmm2, %xmm2 -; SSE3-NEXT: pandn %xmm2, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 +; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_v16i8_z123456789ABCDEz: @@ -503,17 +495,8 @@ define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) { ; SSE2-NEXT: movd %eax, %xmm3 ; SSE2-NEXT: pandn %xmm3, %xmm2 ; SSE2-NEXT: por %xmm2, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; SSE2-NEXT: pand %xmm4, %xmm1 -; SSE2-NEXT: pandn %xmm3, %xmm4 -; SSE2-NEXT: por %xmm4, %xmm1 -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: pandn %xmm3, %xmm2 -; SSE2-NEXT: por %xmm2, %xmm0 -; SSE2-NEXT: por %xmm2, %xmm1 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT: andps {{.*}}(%rip), %xmm1 ; SSE2-NEXT: retq ; ; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: @@ -524,17 +507,8 @@ define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) { ; SSE3-NEXT: movd %eax, %xmm3 ; SSE3-NEXT: pandn %xmm3, %xmm2 ; SSE3-NEXT: por %xmm2, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; SSE3-NEXT: pand %xmm2, %xmm0 -; SSE3-NEXT: pxor %xmm3, %xmm3 -; SSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; SSE3-NEXT: pand %xmm4, %xmm1 -; SSE3-NEXT: pandn %xmm3, %xmm4 -; SSE3-NEXT: por %xmm4, %xmm1 -; SSE3-NEXT: pand %xmm2, %xmm1 -; SSE3-NEXT: pandn %xmm3, %xmm2 -; SSE3-NEXT: por %xmm2, %xmm0 -; SSE3-NEXT: por %xmm2, %xmm1 +; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE3-NEXT: andps {{.*}}(%rip), %xmm1 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: -- 2.40.0