[InstCombine] fold insertelement into splat of same scalar

author Sanjay Patel <spatel@rotateright.com>

Mon, 8 Jul 2019 19:48:52 +0000 (19:48 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Mon, 8 Jul 2019 19:48:52 +0000 (19:48 +0000)
author Sanjay Patel <spatel@rotateright.com>
Mon, 8 Jul 2019 19:48:52 +0000 (19:48 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Mon, 8 Jul 2019 19:48:52 +0000 (19:48 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

index b928ae5b3d18290dfc50ca4582841d38b75fe14c..dc9abdd7f47a4f8dee781398aeec723ea46b6120 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -732,6 +732,40 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
    return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
  }
  
+/// Try to fold an insert element into an existing splat shuffle by changing
+/// the shuffle's mask to include the index of this insert element.
+static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
+  // Check if the vector operand of this insert is a canonical splat shuffle.
+  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
+  if (!Shuf || !Shuf->isZeroEltSplat())
+    return nullptr;
+
+  // Check for a constant insertion index.
+  uint64_t IdxC;
+  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
+    return nullptr;
+
+  // Check if the splat shuffle's input is the same as this insert's scalar op.
+  Value *X = InsElt.getOperand(1);
+  Value *Op0 = Shuf->getOperand(0);
+  if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
+    return nullptr;
+
+  // Replace the shuffle mask element at the index of this insert with a zero.
+  // For example:
+  // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
+  //   --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
+  unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
+  SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
+  Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
+  Constant *Zero = ConstantInt::getNullValue(I32Ty);
+  for (unsigned i = 0; i != NumMaskElts; ++i)
+    NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i);
+
+  Constant *NewMask = ConstantVector::get(NewMaskVec);
+  return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
+}
+
  /// If we have an insertelement instruction feeding into another insertelement
  /// and the 2nd is inserting a constant into the vector, canonicalize that
  /// constant insertion before the insertion of a variable:
@@ -950,6 +984,9 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
    if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
      return Broadcast;
  
+  if (Instruction *Splat = foldInsEltIntoSplat(IE))
+    return Splat;
+
    return nullptr;
  }
  
diff --git a/test/Transforms/InstCombine/broadcast.ll b/test/Transforms/InstCombine/broadcast.ll

index 30c552cb1bd270b940ab188cd1227c212f2ca5a6..49ff3129a68d38e6534bdf4a446784ef518acb07 100644 (file)
--- a/test/Transforms/InstCombine/broadcast.ll
+++ b/test/Transforms/InstCombine/broadcast.ll
@@ -126,14 +126,13 @@ define <1 x float> @bad4(float %arg) {
  }
  
  ; Multiple undef elements are ok.
-; TODO: Multiple uses triggers the transform at %t4, but we could form another splat from %t6 and simplify?
+; TODO: Multiple uses triggers the transform at %t4, but we should sink/scalarize/CSE the splats?
  
  define <4 x float> @splat_undef3(float %arg) {
  ; CHECK-LABEL: @splat_undef3(
  ; CHECK-NEXT:    [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
  ; CHECK-NEXT:    [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
-; CHECK-NEXT:    [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2
-; CHECK-NEXT:    [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
+; CHECK-NEXT:    [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer
  ; CHECK-NEXT:    [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]]
  ; CHECK-NEXT:    ret <4 x float> [[T7]]
  ;
diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll

index 7603298c26c4e22a37da715ab9f2841070ebf69d..3a17acd2432e9a3072fee236bdda31500d05aa8f 100644 (file)
--- a/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -502,8 +502,7 @@ define <4 x float> @insert_nonzero_index_splat_wrong_index(float %x, i32 %index)
  define <4 x float> @insert_in_splat(float %x) {
  ; CHECK-LABEL: @insert_in_splat(
  ; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
-; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 undef>
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 3
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
  ; CHECK-NEXT:    ret <4 x float> [[R]]
  ;
    %xv = insertelement <4 x float> undef, float %x, i32 0
@@ -518,7 +517,7 @@ define <4 x float> @insert_in_splat_extra_uses(float %x) {
  ; CHECK-NEXT:    call void @use(<4 x float> [[XV]])
  ; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 undef>
  ; CHECK-NEXT:    call void @use(<4 x float> [[SPLAT]])
-; CHECK-NEXT:    [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 3
+; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
  ; CHECK-NEXT:    ret <4 x float> [[R]]
  ;
    %xv = insertelement <4 x float> undef, float %x, i32 0
@@ -529,6 +528,8 @@ define <4 x float> @insert_in_splat_extra_uses(float %x) {
    ret <4 x float> %r
  }
  
+; Negative test - not a constant index insert
+
  define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) {
  ; CHECK-LABEL: @insert_in_splat_variable_index(
  ; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
@@ -542,6 +543,8 @@ define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) {
    ret <4 x float> %r
  }
  
+; Negative test - not a splat shuffle
+
  define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) {
  ; CHECK-LABEL: @insert_in_nonsplat(
  ; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
@@ -555,6 +558,8 @@ define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) {
    ret <4 x float> %r
  }
  
+; Negative test - not a splat shuffle
+
  define <4 x float> @insert_in_nonsplat2(float %x, <4 x float> %y) {
  ; CHECK-LABEL: @insert_in_nonsplat2(
  ; CHECK-NEXT:    [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 0
author	Sanjay Patel <spatel@rotateright.com>
	Mon, 8 Jul 2019 19:48:52 +0000 (19:48 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Mon, 8 Jul 2019 19:48:52 +0000 (19:48 +0000)
lib/Transforms/InstCombine/InstCombineVectorOps.cpp		patch \| blob \| history
test/Transforms/InstCombine/broadcast.ll		patch \| blob \| history
test/Transforms/InstCombine/insert-extract-shuffle.ll		patch \| blob \| history