[DAGCombiner] fold vselect-of-constants to shift

author Sanjay Patel <spatel@rotateright.com>

Fri, 11 Oct 2019 14:17:56 +0000 (14:17 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Fri, 11 Oct 2019 14:17:56 +0000 (14:17 +0000)
author Sanjay Patel <spatel@rotateright.com>
Fri, 11 Oct 2019 14:17:56 +0000 (14:17 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Fri, 11 Oct 2019 14:17:56 +0000 (14:17 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 41303921d87a6109aada9e5d21bec2aab017f12e..7fa95ce5cf9bc77732c647d147affaf56b045dc7 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8614,6 +8614,15 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
      return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
    }
  
+  // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
+  APInt Pow2C;
+  if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
+      isNullOrNullSplat(N2)) {
+    SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
+    SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
+    return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
+  }
+
    // The general case for select-of-constants:
    // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
    // ...but that only makes sense if a vselect is slower than 2 logic ops, so
diff --git a/test/CodeGen/X86/selectcc-to-shiftand.ll b/test/CodeGen/X86/selectcc-to-shiftand.ll

index 6cc41fd0cf874776e0c4377ade66cfbe54d39ca1..8e8e1e806f43ed68cbb3b7ee512c35166f2511e5 100644 (file)
--- a/test/CodeGen/X86/selectcc-to-shiftand.ll
+++ b/test/CodeGen/X86/selectcc-to-shiftand.ll
@@ -213,9 +213,8 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) {
  define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
  ; ANY-LABEL: sel_shift_bool_v8i16:
  ; ANY:       # %bb.0:
-; ANY-NEXT:    psllw $15, %xmm0
-; ANY-NEXT:    psraw $15, %xmm0
  ; ANY-NEXT:    pand {{.*}}(%rip), %xmm0
+; ANY-NEXT:    psllw $7, %xmm0
  ; ANY-NEXT:    retq
    %shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer
    ret <8 x i16> %shl
@@ -224,9 +223,8 @@ define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
  define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
  ; ANY-LABEL: sel_shift_bool_v4i32:
  ; ANY:       # %bb.0:
-; ANY-NEXT:    pslld $31, %xmm0
-; ANY-NEXT:    psrad $31, %xmm0
  ; ANY-NEXT:    pand {{.*}}(%rip), %xmm0
+; ANY-NEXT:    pslld $6, %xmm0
  ; ANY-NEXT:    retq
    %shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer
    ret <4 x i32> %shl
@@ -235,10 +233,8 @@ define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
  define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) {
  ; ANY-LABEL: sel_shift_bool_v2i64:
  ; ANY:       # %bb.0:
-; ANY-NEXT:    psllq $63, %xmm0
-; ANY-NEXT:    psrad $31, %xmm0
-; ANY-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
  ; ANY-NEXT:    pand {{.*}}(%rip), %xmm0
+; ANY-NEXT:    psllq $16, %xmm0
  ; ANY-NEXT:    retq
    %shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer
    ret <2 x i64> %shl
diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll

index d3e2b5477ac0c480a4c87d2a0b778e1f59edb050..22166021b4e76afe97078c4806c976ed4e4b09f5 100644 (file)
--- a/test/CodeGen/X86/vselect.ll
+++ b/test/CodeGen/X86/vselect.ll
@@ -647,33 +647,22 @@ define void @vselect_allzeros_LHS_multiple_use_setcc(<4 x i32> %x, <4 x i32> %y,
  ; This test case previously crashed after r363802, r363850, and r363856 due
  ; any_extend_vector_inreg not being handled by the X86 backend.
  define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) {
-; SSE2-LABEL: vselect_any_extend_vector_inreg_crash:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $24, %xmm0
-; SSE2-NEXT:    movq %xmm0, %rax
-; SSE2-NEXT:    andl $32768, %eax # imm = 0x8000
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: vselect_any_extend_vector_inreg_crash:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE41-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
-; SSE41-NEXT:    pmovsxbq %xmm0, %xmm0
-; SSE41-NEXT:    movq %xmm0, %rax
-; SSE41-NEXT:    andl $32768, %eax # imm = 0x8000
-; SSE41-NEXT:    retq
+; SSE-LABEL: vselect_any_extend_vector_inreg_crash:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT:    pcmpeqb {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movq %xmm0, %rax
+; SSE-NEXT:    andl $1, %eax
+; SSE-NEXT:    shlq $15, %rax
+; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: vselect_any_extend_vector_inreg_crash:
  ; AVX:       # %bb.0:
  ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
  ; AVX-NEXT:    vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vpmovsxbq %xmm0, %xmm0
  ; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    andl $32768, %eax # imm = 0x8000
+; AVX-NEXT:    andl $1, %eax
+; AVX-NEXT:    shlq $15, %rax
  ; AVX-NEXT:    retq
  0:
    %1 = load <8 x i8>, <8 x i8>* %x
author	Sanjay Patel <spatel@rotateright.com>
	Fri, 11 Oct 2019 14:17:56 +0000 (14:17 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Fri, 11 Oct 2019 14:17:56 +0000 (14:17 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/selectcc-to-shiftand.ll		patch \| blob \| history
test/CodeGen/X86/vselect.ll		patch \| blob \| history