From f2c20cc531e80a0a3c16e742170e9086fa2e4dbc Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 11 Oct 2019 14:17:56 +0000 Subject: [PATCH] [DAGCombiner] fold vselect-of-constants to shift The diffs suggest that we are missing some more basic analysis/transforms, but this keeps the vector path in sync with the scalar (rL374397). This is again a preliminary step for introducing the reverse transform in IR as proposed in D63382. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374555 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 +++++++ test/CodeGen/X86/selectcc-to-shiftand.ll | 10 +++----- test/CodeGen/X86/vselect.ll | 31 ++++++++---------------- 3 files changed, 22 insertions(+), 28 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 41303921d87..7fa95ce5cf9 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8614,6 +8614,15 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); } + // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C) + APInt Pow2C; + if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() && + isNullOrNullSplat(N2)) { + SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); + } + // The general case for select-of-constants: // vselect Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 // ...but that only makes sense if a vselect is slower than 2 logic ops, so diff --git a/test/CodeGen/X86/selectcc-to-shiftand.ll b/test/CodeGen/X86/selectcc-to-shiftand.ll index 6cc41fd0cf8..8e8e1e806f4 100644 --- a/test/CodeGen/X86/selectcc-to-shiftand.ll +++ b/test/CodeGen/X86/selectcc-to-shiftand.ll @@ -213,9 +213,8 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) { define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) { ; ANY-LABEL: sel_shift_bool_v8i16: ; ANY: # %bb.0: -; ANY-NEXT: psllw $15, %xmm0 -; ANY-NEXT: psraw $15, %xmm0 ; ANY-NEXT: pand {{.*}}(%rip), %xmm0 +; ANY-NEXT: psllw $7, %xmm0 ; ANY-NEXT: retq %shl= select <8 x i1> %t, <8 x i16> , <8 x i16> zeroinitializer ret <8 x i16> %shl @@ -224,9 +223,8 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) { define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) { ; ANY-LABEL: sel_shift_bool_v4i32: ; ANY: # %bb.0: -; ANY-NEXT: pslld $31, %xmm0 -; ANY-NEXT: psrad $31, %xmm0 ; ANY-NEXT: pand {{.*}}(%rip), %xmm0 +; ANY-NEXT: pslld $6, %xmm0 ; ANY-NEXT: retq %shl = select <4 x i1> %t, <4 x i32> , <4 x i32> zeroinitializer ret <4 x i32> %shl @@ -235,10 +233,8 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) { define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) { ; ANY-LABEL: sel_shift_bool_v2i64: ; ANY: # %bb.0: -; ANY-NEXT: psllq $63, %xmm0 -; ANY-NEXT: psrad $31, %xmm0 -; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; ANY-NEXT: pand {{.*}}(%rip), %xmm0 +; ANY-NEXT: psllq $16, %xmm0 ; ANY-NEXT: retq %shl = select <2 x i1> %t, <2 x i64> , <2 x i64> zeroinitializer ret <2 x i64> %shl diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll index d3e2b5477ac..22166021b4e 100644 --- a/test/CodeGen/X86/vselect.ll +++ b/test/CodeGen/X86/vselect.ll @@ -647,33 +647,22 @@ define void @vselect_allzeros_LHS_multiple_use_setcc(<4 x i32> %x, <4 x i32> %y, ; This test case previously crashed after r363802, r363850, and r363856 due ; any_extend_vector_inreg not being handled by the X86 backend. define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) { -; SSE2-LABEL: vselect_any_extend_vector_inreg_crash: -; SSE2: # %bb.0: -; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE2-NEXT: psrad $24, %xmm0 -; SSE2-NEXT: movq %xmm0, %rax -; SSE2-NEXT: andl $32768, %eax # imm = 0x8000 -; SSE2-NEXT: retq -; -; SSE41-LABEL: vselect_any_extend_vector_inreg_crash: -; SSE41: # %bb.0: -; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; SSE41-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 -; SSE41-NEXT: pmovsxbq %xmm0, %xmm0 -; SSE41-NEXT: movq %xmm0, %rax -; SSE41-NEXT: andl $32768, %eax # imm = 0x8000 -; SSE41-NEXT: retq +; SSE-LABEL: vselect_any_extend_vector_inreg_crash: +; SSE: # %bb.0: +; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: pcmpeqb {{.*}}(%rip), %xmm0 +; SSE-NEXT: movq %xmm0, %rax +; SSE-NEXT: andl $1, %eax +; SSE-NEXT: shlq $15, %rax +; SSE-NEXT: retq ; ; AVX-LABEL: vselect_any_extend_vector_inreg_crash: ; AVX: # %bb.0: ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 ; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: andl $32768, %eax # imm = 0x8000 +; AVX-NEXT: andl $1, %eax +; AVX-NEXT: shlq $15, %rax ; AVX-NEXT: retq 0: %1 = load <8 x i8>, <8 x i8>* %x -- 2.40.0