From 4316ebd34d9fb264a60d29a4c7cb708434b82c81 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 17 Jun 2016 16:46:50 +0000
Subject: [PATCH] [InstCombine] allow more than one use for vector bitcast
 folding with selects

The motivating example for this transform is similar to D20774 where bitcasts interfere
with a single cmp/select sequence, but in this case we have 2 uses of each bitcast to
produce min and max ops:

define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) {
  %cmp = fcmp olt <4 x float> %a, %b
  %bc1 = bitcast <4 x float> %a to <4 x i32>
  %bc2 = bitcast <4 x float> %b to <4 x i32>
  %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
  %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
  %bc3 = bitcast <4 x float>* %ptr1 to <4 x i32>*
  store <4 x i32> %sel1, <4 x i32>* %bc3
  %bc4 = bitcast <4 x float>* %ptr2 to <4 x i32>*
  store <4 x i32> %sel2, <4 x i32>* %bc4
  ret void
}

With this patch, we move the selects up to use the input args which allows getting rid of
all of the bitcasts:

define void @minmax_bc_store(<4 x float> %a, <4 x float> %b, <4 x float>* %ptr1, <4 x float>* %ptr2) {
  %cmp = fcmp olt <4 x float> %a, %b
  %sel1.v = select <4 x i1> %cmp, <4 x float> %a, <4 x float> %b
  %sel2.v = select <4 x i1> %cmp, <4 x float> %b, <4 x float> %a
  store <4 x float> %sel1.v, <4 x float>* %ptr1, align 16
  store <4 x float> %sel2.v, <4 x float>* %ptr2, align 16
  ret void
}

The asm for x86 SSE then improves from:

movaps  %xmm0, %xmm2
cmpltps %xmm1, %xmm2
movaps  %xmm2, %xmm3
andnps  %xmm1, %xmm3
movaps  %xmm2, %xmm4
andnps  %xmm0, %xmm4
andps %xmm2, %xmm0
orps  %xmm3, %xmm0
andps %xmm1, %xmm2
orps  %xmm4, %xmm2
movaps  %xmm0, (%rdi)
movaps  %xmm2, (%rsi)

To:

movaps  %xmm0, %xmm2
minps %xmm1, %xmm2
maxps %xmm0, %xmm1
movaps  %xmm2, (%rdi)
movaps  %xmm1, (%rsi)

The TODO comments show that we're limiting this transform only to vectors and only to bitcasts
because we need to improve other transforms or risk creating worse codegen.

Differential Revision: http://reviews.llvm.org/D21190


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273011 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineSelect.cpp         |  48 +++--
 test/Transforms/InstCombine/select.ll         | 193 +++++++++++-------
 2 files changed, 155 insertions(+), 86 deletions(-)

diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 6b56bc6ff04..7c3ef99d227 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -116,8 +116,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) {
   }
 }
 
-/// Here we have (select c, TI, FI), and we know that TI and FI
-/// have the same opcode and only one use each.  Try to simplify this.
+/// We have (select c, TI, FI), and we know that TI and FI have the same opcode.
 Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
                                           Instruction *FI) {
   // If this is a cast from the same type, merge.
@@ -129,10 +128,30 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
     // The select condition may be a vector. We may only change the operand
     // type if the vector width remains the same (and matches the condition).
     Type *CondTy = SI.getCondition()->getType();
-    if (CondTy->isVectorTy() &&
-        (!FIOpndTy->isVectorTy() ||
-         CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
+    if (CondTy->isVectorTy()) {
+      if (!FIOpndTy->isVectorTy())
+        return nullptr;
+      if (CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())
+        return nullptr;
+
+      // TODO: If the backend knew how to deal with casts better, we could
+      // remove this limitation. For now, there's too much potential to create
+      // worse codegen by promoting the select ahead of size-altering casts
+      // (PR28160).
+      //
+      // Note that ValueTracking's matchSelectPattern() looks through casts
+      // without checking 'hasOneUse' when it matches min/max patterns, so this
+      // transform may end up happening anyway.
+      if (TI->getOpcode() != Instruction::BitCast &&
+          (!TI->hasOneUse() || !FI->hasOneUse()))
+        return nullptr;
+
+    } else if (!TI->hasOneUse() || !FI->hasOneUse()) {
+      // TODO: The one-use restrictions for a scalar select could be eased if
+      // the fold of a select in visitLoadInst() was enhanced to match a pattern
+      // that includes a cast.
       return nullptr;
+    }
 
     // Fold this by inserting a select from the input values.
     Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
@@ -141,8 +160,13 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
                             TI->getType());
   }
 
-  // Only handle binary operators here.
-  if (!isa<BinaryOperator>(TI))
+  // TODO: This function ends awkwardly in unreachable - fix to be more normal.
+
+  // Only handle binary operators with one-use here. As with the cast case
+  // above, it may be possible to relax the one-use constraint, but that needs
+  // be examined carefully since it may not reduce the total number of
+  // instructions.
+  if (!isa<BinaryOperator>(TI) || !TI->hasOneUse() || !FI->hasOneUse())
     return nullptr;
 
   // Figure out if the operations have any operands in common.
@@ -1056,14 +1080,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   if (Instruction *Add = foldAddSubSelect(SI, *Builder))
     return Add;
 
+  // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
   auto *TI = dyn_cast<Instruction>(TrueVal);
   auto *FI = dyn_cast<Instruction>(FalseVal);
-  if (TI && FI && TI->hasOneUse() && FI->hasOneUse()) {
-    // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
-    if (TI->getOpcode() == FI->getOpcode())
-      if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
-        return IV;
-  }
+  if (TI && FI && TI->getOpcode() == FI->getOpcode())
+    if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+      return IV;
 
   // See if we can fold the select into one of our operands.
   if (SI.getType()->isIntOrIntVectorTy() || SI.getType()->isFPOrFPVectorTy()) {
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 876999b95dc..00b474231be 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -5,14 +5,14 @@
 target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
 
 define i32 @test1(i32 %A, i32 %B) {
-        %C = select i1 false, i32 %A, i32 %B            
+        %C = select i1 false, i32 %A, i32 %B
         ret i32 %C
 ; CHECK-LABEL: @test1(
 ; CHECK: ret i32 %B
 }
 
 define i32 @test2(i32 %A, i32 %B) {
-        %C = select i1 true, i32 %A, i32 %B             
+        %C = select i1 true, i32 %A, i32 %B
         ret i32 %C
 ; CHECK-LABEL: @test2(
 ; CHECK: ret i32 %A
@@ -21,7 +21,7 @@ define i32 @test2(i32 %A, i32 %B) {
 
 define i32 @test3(i1 %C, i32 %I) {
         ; V = I
-        %V = select i1 %C, i32 %I, i32 %I               
+        %V = select i1 %C, i32 %I, i32 %I
         ret i32 %V
 ; CHECK-LABEL: @test3(
 ; CHECK: ret i32 %I
@@ -29,7 +29,7 @@ define i32 @test3(i1 %C, i32 %I) {
 
 define i1 @test4(i1 %C) {
         ; V = C
-        %V = select i1 %C, i1 true, i1 false            
+        %V = select i1 %C, i1 true, i1 false
         ret i1 %V
 ; CHECK-LABEL: @test4(
 ; CHECK: ret i1 %C
@@ -37,16 +37,16 @@ define i1 @test4(i1 %C) {
 
 define i1 @test5(i1 %C) {
         ; V = !C
-        %V = select i1 %C, i1 false, i1 true            
+        %V = select i1 %C, i1 false, i1 true
         ret i1 %V
 ; CHECK-LABEL: @test5(
 ; CHECK: xor i1 %C, true
 ; CHECK: ret i1
 }
 
-define i32 @test6(i1 %C) { 
+define i32 @test6(i1 %C) {
         ; V = cast C to int
-        %V = select i1 %C, i32 1, i32 0         
+        %V = select i1 %C, i32 1, i32 0
         ret i32 %V
 ; CHECK-LABEL: @test6(
 ; CHECK: %V = zext i1 %C to i32
@@ -54,8 +54,8 @@ define i32 @test6(i1 %C) {
 }
 
 define i1 @test7(i1 %C, i1 %X) {
-        ; R = or C, X       
-        %R = select i1 %C, i1 true, i1 %X               
+        ; R = or C, X
+        %R = select i1 %C, i1 true, i1 %X
         ret i1 %R
 ; CHECK-LABEL: @test7(
 ; CHECK: %R = or i1 %C, %X
@@ -64,7 +64,7 @@ define i1 @test7(i1 %C, i1 %X) {
 
 define i1 @test8(i1 %C, i1 %X) {
         ; R = and C, X
-        %R = select i1 %C, i1 %X, i1 false              
+        %R = select i1 %C, i1 %X, i1 false
         ret i1 %R
 ; CHECK-LABEL: @test8(
 ; CHECK: %R = and i1 %C, %X
@@ -73,7 +73,7 @@ define i1 @test8(i1 %C, i1 %X) {
 
 define i1 @test9(i1 %C, i1 %X) {
         ; R = and !C, X
-        %R = select i1 %C, i1 false, i1 %X              
+        %R = select i1 %C, i1 false, i1 %X
         ret i1 %R
 ; CHECK-LABEL: @test9(
 ; CHECK: xor i1 %C, true
@@ -83,7 +83,7 @@ define i1 @test9(i1 %C, i1 %X) {
 
 define i1 @test10(i1 %C, i1 %X) {
         ; R = or !C, X
-        %R = select i1 %C, i1 %X, i1 true               
+        %R = select i1 %C, i1 %X, i1 true
         ret i1 %R
 ; CHECK-LABEL: @test10(
 ; CHECK: xor i1 %C, true
@@ -92,8 +92,8 @@ define i1 @test10(i1 %C, i1 %X) {
 }
 
 define i32 @test11(i32 %a) {
-        %C = icmp eq i32 %a, 0          
-        %R = select i1 %C, i32 0, i32 1         
+        %C = icmp eq i32 %a, 0
+        %R = select i1 %C, i32 0, i32 1
         ret i32 %R
 ; CHECK-LABEL: @test11(
 ; CHECK: icmp ne i32 %a, 0
@@ -102,8 +102,8 @@ define i32 @test11(i32 %a) {
 }
 
 define i32 @test12(i1 %cond, i32 %a) {
-        %b = or i32 %a, 1               
-        %c = select i1 %cond, i32 %b, i32 %a            
+        %b = or i32 %a, 1
+        %c = select i1 %cond, i32 %b, i32 %a
         ret i32 %c
 ; CHECK-LABEL: @test12(
 ; CHECK: %b = zext i1 %cond to i32
@@ -112,8 +112,8 @@ define i32 @test12(i1 %cond, i32 %a) {
 }
 
 define i32 @test12a(i1 %cond, i32 %a) {
-        %b = ashr i32 %a, 1             
-        %c = select i1 %cond, i32 %b, i32 %a            
+        %b = ashr i32 %a, 1
+        %c = select i1 %cond, i32 %b, i32 %a
         ret i32 %c
 ; CHECK-LABEL: @test12a(
 ; CHECK: %b = zext i1 %cond to i32
@@ -122,8 +122,8 @@ define i32 @test12a(i1 %cond, i32 %a) {
 }
 
 define i32 @test12b(i1 %cond, i32 %a) {
-        %b = ashr i32 %a, 1             
-        %c = select i1 %cond, i32 %a, i32 %b            
+        %b = ashr i32 %a, 1
+        %c = select i1 %cond, i32 %a, i32 %b
         ret i32 %c
 ; CHECK-LABEL: @test12b(
 ; CHECK: zext i1 %cond to i32
@@ -133,33 +133,33 @@ define i32 @test12b(i1 %cond, i32 %a) {
 }
 
 define i32 @test13(i32 %a, i32 %b) {
-        %C = icmp eq i32 %a, %b         
-        %V = select i1 %C, i32 %a, i32 %b               
+        %C = icmp eq i32 %a, %b
+        %V = select i1 %C, i32 %a, i32 %b
         ret i32 %V
 ; CHECK-LABEL: @test13(
 ; CHECK: ret i32 %b
 }
 
 define i32 @test13a(i32 %a, i32 %b) {
-        %C = icmp ne i32 %a, %b         
-        %V = select i1 %C, i32 %a, i32 %b               
+        %C = icmp ne i32 %a, %b
+        %V = select i1 %C, i32 %a, i32 %b
         ret i32 %V
 ; CHECK-LABEL: @test13a(
 ; CHECK: ret i32 %a
 }
 
 define i32 @test13b(i32 %a, i32 %b) {
-        %C = icmp eq i32 %a, %b         
-        %V = select i1 %C, i32 %b, i32 %a               
+        %C = icmp eq i32 %a, %b
+        %V = select i1 %C, i32 %b, i32 %a
         ret i32 %V
 ; CHECK-LABEL: @test13b(
 ; CHECK: ret i32 %a
 }
 
 define i1 @test14a(i1 %C, i32 %X) {
-        %V = select i1 %C, i32 %X, i32 0                
+        %V = select i1 %C, i32 %X, i32 0
         ; (X < 1) | !C
-        %R = icmp slt i32 %V, 1         
+        %R = icmp slt i32 %V, 1
         ret i1 %R
 ; CHECK-LABEL: @test14a(
 ; CHECK: icmp slt i32 %X, 1
@@ -169,9 +169,9 @@ define i1 @test14a(i1 %C, i32 %X) {
 }
 
 define i1 @test14b(i1 %C, i32 %X) {
-        %V = select i1 %C, i32 0, i32 %X                
+        %V = select i1 %C, i32 0, i32 %X
         ; (X < 1) | C
-        %R = icmp slt i32 %V, 1         
+        %R = icmp slt i32 %V, 1
         ret i1 %R
 ; CHECK-LABEL: @test14b(
 ; CHECK: icmp slt i32 %X, 1
@@ -181,9 +181,9 @@ define i1 @test14b(i1 %C, i32 %X) {
 
 ;; Code sequence for (X & 16) ? 16 : 0
 define i32 @test15a(i32 %X) {
-        %t1 = and i32 %X, 16            
-        %t2 = icmp eq i32 %t1, 0                
-        %t3 = select i1 %t2, i32 0, i32 16              
+        %t1 = and i32 %X, 16
+        %t2 = icmp eq i32 %t1, 0
+        %t3 = select i1 %t2, i32 0, i32 16
         ret i32 %t3
 ; CHECK-LABEL: @test15a(
 ; CHECK: %t1 = and i32 %X, 16
@@ -192,9 +192,9 @@ define i32 @test15a(i32 %X) {
 
 ;; Code sequence for (X & 32) ? 0 : 24
 define i32 @test15b(i32 %X) {
-        %t1 = and i32 %X, 32            
-        %t2 = icmp eq i32 %t1, 0                
-        %t3 = select i1 %t2, i32 32, i32 0              
+        %t1 = and i32 %X, 32
+        %t2 = icmp eq i32 %t1, 0
+        %t3 = select i1 %t2, i32 32, i32 0
         ret i32 %t3
 ; CHECK-LABEL: @test15b(
 ; CHECK: %t1 = and i32 %X, 32
@@ -204,9 +204,9 @@ define i32 @test15b(i32 %X) {
 
 ;; Alternate code sequence for (X & 16) ? 16 : 0
 define i32 @test15c(i32 %X) {
-        %t1 = and i32 %X, 16            
-        %t2 = icmp eq i32 %t1, 16               
-        %t3 = select i1 %t2, i32 16, i32 0              
+        %t1 = and i32 %X, 16
+        %t2 = icmp eq i32 %t1, 16
+        %t3 = select i1 %t2, i32 16, i32 0
         ret i32 %t3
 ; CHECK-LABEL: @test15c(
 ; CHECK: %t1 = and i32 %X, 16
@@ -215,9 +215,9 @@ define i32 @test15c(i32 %X) {
 
 ;; Alternate code sequence for (X & 16) ? 16 : 0
 define i32 @test15d(i32 %X) {
-        %t1 = and i32 %X, 16            
-        %t2 = icmp ne i32 %t1, 0                
-        %t3 = select i1 %t2, i32 16, i32 0              
+        %t1 = and i32 %X, 16
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 16, i32 0
         ret i32 %t3
 ; CHECK-LABEL: @test15d(
 ; CHECK: %t1 = and i32 %X, 16
@@ -300,8 +300,8 @@ define i32 @test15j(i32 %X) {
 }
 
 define i32 @test16(i1 %C, i32* %P) {
-        %P2 = select i1 %C, i32* %P, i32* null          
-        %V = load i32, i32* %P2              
+        %P2 = select i1 %C, i32* %P, i32* null
+        %V = load i32, i32* %P2
         ret i32 %V
 ; CHECK-LABEL: @test16(
 ; CHECK-NEXT: %V = load i32, i32* %P
@@ -329,8 +329,8 @@ define i32 @test16_neg2(i1 %C, i32 addrspace(1)* %P) {
 }
 
 define i1 @test17(i32* %X, i1 %C) {
-        %R = select i1 %C, i32* %X, i32* null           
-        %RV = icmp eq i32* %R, null             
+        %R = select i1 %C, i32* %X, i32* null
+        %RV = icmp eq i32* %R, null
         ret i1 %RV
 ; CHECK-LABEL: @test17(
 ; CHECK: icmp eq i32* %X, null
@@ -340,8 +340,8 @@ define i1 @test17(i32* %X, i1 %C) {
 }
 
 define i32 @test18(i32 %X, i32 %Y, i1 %C) {
-        %R = select i1 %C, i32 %X, i32 0                
-        %V = sdiv i32 %Y, %R            
+        %R = select i1 %C, i32 %X, i32 0
+        %V = sdiv i32 %Y, %R
         ret i32 %V
 ; CHECK-LABEL: @test18(
 ; CHECK: %V = sdiv i32 %Y, %X
@@ -349,45 +349,45 @@ define i32 @test18(i32 %X, i32 %Y, i1 %C) {
 }
 
 define i32 @test19(i32 %x) {
-        %tmp = icmp ugt i32 %x, 2147483647              
-        %retval = select i1 %tmp, i32 -1, i32 0         
+        %tmp = icmp ugt i32 %x, 2147483647
+        %retval = select i1 %tmp, i32 -1, i32 0
         ret i32 %retval
 ; CHECK-LABEL: @test19(
 ; CHECK-NEXT: ashr i32 %x, 31
-; CHECK-NEXT: ret i32 
+; CHECK-NEXT: ret i32
 }
 
 define i32 @test20(i32 %x) {
-        %tmp = icmp slt i32 %x, 0               
-        %retval = select i1 %tmp, i32 -1, i32 0         
+        %tmp = icmp slt i32 %x, 0
+        %retval = select i1 %tmp, i32 -1, i32 0
         ret i32 %retval
 ; CHECK-LABEL: @test20(
 ; CHECK-NEXT: ashr i32 %x, 31
-; CHECK-NEXT: ret i32 
+; CHECK-NEXT: ret i32
 }
 
 define i64 @test21(i32 %x) {
-        %tmp = icmp slt i32 %x, 0               
-        %retval = select i1 %tmp, i64 -1, i64 0         
+        %tmp = icmp slt i32 %x, 0
+        %retval = select i1 %tmp, i64 -1, i64 0
         ret i64 %retval
 ; CHECK-LABEL: @test21(
 ; CHECK-NEXT: ashr i32 %x, 31
-; CHECK-NEXT: sext i32 
+; CHECK-NEXT: sext i32
 ; CHECK-NEXT: ret i64
 }
 
 define i16 @test22(i32 %x) {
-        %tmp = icmp slt i32 %x, 0               
-        %retval = select i1 %tmp, i16 -1, i16 0         
+        %tmp = icmp slt i32 %x, 0
+        %retval = select i1 %tmp, i16 -1, i16 0
         ret i16 %retval
 ; CHECK-LABEL: @test22(
 ; CHECK-NEXT: ashr i32 %x, 31
-; CHECK-NEXT: trunc i32 
+; CHECK-NEXT: trunc i32
 ; CHECK-NEXT: ret i16
 }
 
 define i1 @test23(i1 %a, i1 %b) {
-        %c = select i1 %a, i1 %b, i1 %a         
+        %c = select i1 %a, i1 %b, i1 %a
         ret i1 %c
 ; CHECK-LABEL: @test23(
 ; CHECK-NEXT: %c = and i1 %a, %b
@@ -395,7 +395,7 @@ define i1 @test23(i1 %a, i1 %b) {
 }
 
 define i1 @test24(i1 %a, i1 %b) {
-        %c = select i1 %a, i1 %a, i1 %b         
+        %c = select i1 %a, i1 %a, i1 %b
         ret i1 %c
 ; CHECK-LABEL: @test24(
 ; CHECK-NEXT: %c = or i1 %a, %b
@@ -406,7 +406,7 @@ define i32 @test25(i1 %c)  {
 entry:
   br i1 %c, label %jump, label %ret
 jump:
-  br label %ret 
+  br label %ret
 ret:
   %a = phi i1 [true, %jump], [false, %entry]
   %b = select i1 %a, i32 10, i32 20
@@ -421,7 +421,7 @@ entry:
   br i1 %cond, label %jump, label %ret
 jump:
   %c = or i1 false, false
-  br label %ret 
+  br label %ret
 ret:
   %a = phi i1 [true, %entry], [%c, %jump]
   %b = select i1 %a, i32 20, i32 10
@@ -435,7 +435,7 @@ define i32 @test27(i1 %c, i32 %A, i32 %B)  {
 entry:
   br i1 %c, label %jump, label %ret
 jump:
-  br label %ret 
+  br label %ret
 ret:
   %a = phi i1 [true, %jump], [false, %entry]
   %b = select i1 %a, i32 %A, i32 %B
@@ -449,7 +449,7 @@ define i32 @test28(i1 %cond, i32 %A, i32 %B)  {
 entry:
   br i1 %cond, label %jump, label %ret
 jump:
-  br label %ret 
+  br label %ret
 ret:
   %c = phi i32 [%A, %jump], [%B, %entry]
   %a = phi i1 [true, %jump], [false, %entry]
@@ -464,12 +464,12 @@ define i32 @test29(i1 %cond, i32 %A, i32 %B)  {
 entry:
   br i1 %cond, label %jump, label %ret
 jump:
-  br label %ret 
+  br label %ret
 ret:
   %c = phi i32 [%A, %jump], [%B, %entry]
   %a = phi i1 [true, %jump], [false, %entry]
   br label %next
-  
+
 next:
   %b = select i1 %a, i32 %A, i32 %c
   ret i32 %b
@@ -483,7 +483,7 @@ next:
 define i32 @test30(i32 %x, i32 %y) {
   %cmp = icmp sgt i32 %x, %y
   %cond = select i1 %cmp, i32 %x, i32 %y
-  
+
   %cmp5 = icmp sgt i32 %cond, %x
   %retval = select i1 %cmp5, i32 %cond, i32 %x
   ret i32 %retval
@@ -493,7 +493,7 @@ define i32 @test30(i32 %x, i32 %y) {
 
 ; UMAX(UMAX(x, y), x) -> UMAX(x, y)
 define i32 @test31(i32 %x, i32 %y) {
-  %cmp = icmp ugt i32 %x, %y 
+  %cmp = icmp ugt i32 %x, %y
   %cond = select i1 %cmp, i32 %x, i32 %y
   %cmp5 = icmp ugt i32 %cond, %x
   %retval = select i1 %cmp5, i32 %cond, i32 %x
@@ -723,6 +723,53 @@ define i48 @test51(<3 x i1> %icmp, <3 x i16> %tmp) {
   ret i48 %tmp2
 }
 
+; Allow select promotion even if there are multiple uses of bitcasted ops.
+; Hoisting the selects allows later pattern matching to see that these are min/max ops.
+
+define void @min_max_bitcast(<4 x float> %a, <4 x float> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
+; CHECK-LABEL: @min_max_bitcast(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x float> %a, %b
+; CHECK-NEXT:    [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %a, <4 x float> %b
+; CHECK-NEXT:    [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32>* %ptr1 to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[SEL1_V]], <4 x float>* [[TMP1]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32>* %ptr2 to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[SEL2_V]], <4 x float>* [[TMP2]], align 16
+; CHECK-NEXT:    ret void
+;
+  %cmp = fcmp olt <4 x float> %a, %b
+  %bc1 = bitcast <4 x float> %a to <4 x i32>
+  %bc2 = bitcast <4 x float> %b to <4 x i32>
+  %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
+  %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
+  store <4 x i32> %sel1, <4 x i32>* %ptr1
+  store <4 x i32> %sel2, <4 x i32>* %ptr2
+  ret void
+}
+
+; To avoid potential backend problems, we don't do the same transform for other casts.
+
+define void @truncs_before_selects(<4 x float> %f1, <4 x float> %f2, <4 x i64> %a, <4 x i64> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
+; CHECK-LABEL: @truncs_before_selects(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x float> %f1, %f2
+; CHECK-NEXT:    [[BC1:%.*]] = trunc <4 x i64> %a to <4 x i32>
+; CHECK-NEXT:    [[BC2:%.*]] = trunc <4 x i64> %b to <4 x i32>
+; CHECK-NEXT:    [[SEL1:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[BC1]], <4 x i32> [[BC2]]
+; CHECK-NEXT:    [[SEL2:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[BC2]], <4 x i32> [[BC1]]
+; CHECK-NEXT:    store <4 x i32> [[SEL1]], <4 x i32>* %ptr1, align 16
+; CHECK-NEXT:    store <4 x i32> [[SEL2]], <4 x i32>* %ptr2, align 16
+; CHECK-NEXT:    ret void
+;
+  %cmp = fcmp olt <4 x float> %f1, %f2
+  %bc1 = trunc <4 x i64> %a to <4 x i32>
+  %bc2 = trunc <4 x i64> %b to <4 x i32>
+  %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
+  %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
+  store <4 x i32> %sel1, <4 x i32>* %ptr1, align 16
+  store <4 x i32> %sel2, <4 x i32>* %ptr2, align 16
+  ret void
+}
+
 ; PR8575
 
 define i32 @test52(i32 %n, i32 %m) nounwind {
@@ -755,7 +802,7 @@ define i32 @test54(i32 %X, i32 %Y) {
 ; CHECK-NOT: ashr
 ; CHECK-NOT: select
 ; CHECK: icmp ne i32 %X, 0
-; CHECK: zext 
+; CHECK: zext
 ; CHECK: ret
 }
 
@@ -833,7 +880,7 @@ define i32 @test61(i32* %ptr) {
 
 define i1 @test62(i1 %A, i1 %B) {
         %not = xor i1 %A, true
-        %C = select i1 %A, i1 %not, i1 %B             
+        %C = select i1 %A, i1 %not, i1 %B
         ret i1 %C
 ; CHECK-LABEL: @test62(
 ; CHECK: %not = xor i1 %A, true
@@ -843,7 +890,7 @@ define i1 @test62(i1 %A, i1 %B) {
 
 define i1 @test63(i1 %A, i1 %B) {
         %not = xor i1 %A, true
-        %C = select i1 %A, i1 %B, i1 %not         
+        %C = select i1 %A, i1 %B, i1 %not
         ret i1 %C
 ; CHECK-LABEL: @test63(
 ; CHECK: %not = xor i1 %A, true
-- 
2.50.1