[AggressiveInstCombine] convert rotate with guard branch into funnel shift (PR34924)

author Sanjay Patel <spatel@rotateright.com>

Mon, 17 Dec 2018 21:14:51 +0000 (21:14 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Mon, 17 Dec 2018 21:14:51 +0000 (21:14 +0000)
author Sanjay Patel <spatel@rotateright.com>
Mon, 17 Dec 2018 21:14:51 +0000 (21:14 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Mon, 17 Dec 2018 21:14:51 +0000 (21:14 +0000)
diff --git a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

index 7560060b7604de4e8d04f6431783b4feb96d7417..a9442255e34c1432818bbbc2b16bd932431f8faa 100644 (file)
--- a/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -59,6 +59,99 @@ public:
  };
  } // namespace
  
+/// Match a pattern for a bitwise rotate operation that partially guards
+/// against undefined behavior by branching around the rotation when the shift
+/// amount is 0.
+static bool foldGuardedRotateToFunnelShift(Instruction &I) {
+  if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
+    return false;
+
+  // As with the one-use checks below, this is not strictly necessary, but we
+  // are being cautious to avoid potential perf regressions on targets that
+  // do not actually have a rotate instruction (where the funnel shift would be
+  // expanded back into math/shift/logic ops).
+  if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
+    return false;
+
+  // Match V to funnel shift left/right and capture the source operand and
+  // shift amount in X and Y.
+  auto matchRotate = [](Value *V, Value *&X, Value *&Y) {
+    Value *L0, *L1, *R0, *R1;
+    unsigned Width = V->getType()->getScalarSizeInBits();
+    auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1));
+
+    // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y))
+    auto RotL = m_OneUse(m_c_Or(m_Shl(m_Value(L0), m_Value(L1)),
+                                m_LShr(m_Value(R0), Sub)));
+    if (RotL.match(V) && L0 == R0 && L1 == R1) {
+      X = L0;
+      Y = L1;
+      return Intrinsic::fshl;
+    }
+
+    // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y))
+    auto RotR = m_OneUse(m_c_Or(m_LShr(m_Value(L0), m_Value(L1)),
+                                m_Shl(m_Value(R0), Sub)));
+    if (RotR.match(V) && L0 == R0 && L1 == R1) {
+      X = L0;
+      Y = L1;
+      return Intrinsic::fshr;
+    }
+
+    return Intrinsic::not_intrinsic;
+  };
+
+  // One phi operand must be a rotate operation, and the other phi operand must
+  // be the source value of that rotate operation:
+  // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ]
+  PHINode &Phi = cast<PHINode>(I);
+  Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
+  Value *RotSrc, *RotAmt;
+  Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt);
+  if (IID == Intrinsic::not_intrinsic || RotSrc != P1) {
+    IID = matchRotate(P1, RotSrc, RotAmt);
+    if (IID == Intrinsic::not_intrinsic || RotSrc != P0)
+      return false;
+    assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
+           "Pattern must match funnel shift left or right");
+  }
+
+  // The incoming block with our source operand must be the "guard" block.
+  // That must contain a cmp+branch to avoid the rotate when the shift amount
+  // is equal to 0. The other incoming block is the block with the rotate.
+  BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1);
+  BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1);
+  Instruction *TermI = GuardBB->getTerminator();
+  BasicBlock *TrueBB, *FalseBB;
+  ICmpInst::Predicate Pred;
+  if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()),
+                         TrueBB, FalseBB)))
+    return false;
+
+  BasicBlock *PhiBB = Phi.getParent();
+  if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB)
+    return false;
+
+  // We matched a variation of this IR pattern:
+  // GuardBB:
+  //   %cmp = icmp eq i32 %RotAmt, 0
+  //   br i1 %cmp, label %PhiBB, label %RotBB
+  // RotBB:
+  //   %sub = sub i32 32, %RotAmt
+  //   %shr = lshr i32 %X, %sub
+  //   %shl = shl i32 %X, %RotAmt
+  //   %rot = or i32 %shr, %shl
+  //   br label %PhiBB
+  // PhiBB:
+  //   %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ]
+  // -->
+  // llvm.fshl.i32(i32 %X, i32 %RotAmt)
+  IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
+  Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
+  Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt}));
+  return true;
+}
+
  /// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
  /// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
  /// of 'and' ops, then we also need to capture the fact that we saw an
@@ -174,8 +267,10 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
      // Also, we want to avoid matching partial patterns.
      // TODO: It would be more efficient if we removed dead instructions
      // iteratively in this loop rather than waiting until the end.
-    for (Instruction &I : make_range(BB.rbegin(), BB.rend()))
+    for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
        MadeChange |= foldAnyOrAllBitsSet(I);
+      MadeChange |= foldGuardedRotateToFunnelShift(I);
+    }
    }
  
    // We're done with transforms, so remove dead instructions.
diff --git a/test/Transforms/AggressiveInstCombine/rotate.ll b/test/Transforms/AggressiveInstCombine/rotate.ll

index 8ed3825869110c459887f31727dd0d2030b1a42e..20499087e35cfb8d3c9ff65e8fc03ea7720faab8 100644 (file)
--- a/test/Transforms/AggressiveInstCombine/rotate.ll
+++ b/test/Transforms/AggressiveInstCombine/rotate.ll
@@ -9,14 +9,10 @@ define i32 @rotl(i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
  ; CHECK:       rotbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %b, 0
@@ -40,14 +36,10 @@ define i32 @rotl_commute_phi(i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
  ; CHECK:       rotbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %b, 0
@@ -71,14 +63,10 @@ define i32 @rotl_commute_or(i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
  ; CHECK:       rotbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %b, 0
@@ -104,15 +92,11 @@ define i32 @rotl_insert_valid_location(i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
  ; CHECK:       rotbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[OTHER:%.*]] = phi i32 [ 1, [[ROTBB]] ], [ 2, [[ENTRY]] ]
-; CHECK-NEXT:    [[RES:%.*]] = or i32 [[COND]], [[OTHER]]
+; CHECK-NEXT:    [[OTHER:%.*]] = phi i32 [ 1, [[ROTBB]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    [[RES:%.*]] = or i32 [[TMP0]], [[OTHER]]
  ; CHECK-NEXT:    ret i32 [[RES]]
  ;
  entry:
@@ -139,14 +123,10 @@ define i32 @rotr(i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
  ; CHECK:       rotbb:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
-; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[COND]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
  ;
  entry:
    %cmp = icmp eq i32 %b, 0
@@ -170,10 +150,205 @@ define i32 @rotr_commute_phi(i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
  ; CHECK:       rotbb:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+  %sub = sub i32 32, %b
+  %shl = shl i32 %a, %sub
+  %shr = lshr i32 %a, %b
+  %or = or i32 %shr, %shl
+  br label %end
+
+end:
+  %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
+  ret i32 %cond
+}
+
+define i32 @rotr_commute_or(i32 %a, i32 %b) {
+; CHECK-LABEL: @rotr_commute_or(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK:       rotbb:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+  %sub = sub i32 32, %b
+  %shl = shl i32 %a, %sub
+  %shr = lshr i32 %a, %b
+  %or = or i32 %shl, %shr
+  br label %end
+
+end:
+  %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
+  ret i32 %cond
+}
+
+; Negative test - non-power-of-2 might require urem expansion in the backend.
+
+define i12 @could_be_rotr_weird_type(i12 %a, i12 %b) {
+; CHECK-LABEL: @could_be_rotr_weird_type(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i12 [[B:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK:       rotbb:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i12 12, [[B]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i12 [[A:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i12 [[A]], [[B]]
+; CHECK-NEXT:    [[OR:%.*]] = or i12 [[SHL]], [[SHR]]
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i12 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
+; CHECK-NEXT:    ret i12 [[COND]]
+;
+entry:
+  %cmp = icmp eq i12 %b, 0
+  br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+  %sub = sub i12 12, %b
+  %shl = shl i12 %a, %sub
+  %shr = lshr i12 %a, %b
+  %or = or i12 %shl, %shr
+  br label %end
+
+end:
+  %cond = phi i12 [ %a, %entry ], [ %or, %rotbb ]
+  ret i12 %cond
+}
+
+; Negative test - wrong phi ops.
+
+define i32 @not_rotr_1(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_rotr_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK:       rotbb:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A]], [[B]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+  %sub = sub i32 32, %b
+  %shl = shl i32 %a, %sub
+  %shr = lshr i32 %a, %b
+  %or = or i32 %shl, %shr
+  br label %end
+
+end:
+  %cond = phi i32 [ %b, %entry ], [ %or, %rotbb ]
+  ret i32 %cond
+}
+
+; Negative test - too many phi ops.
+
+define i32 @not_rotr_2(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @not_rotr_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK:       rotbb:
  ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
  ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
  ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A]], [[B]]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[CMP42:%.*]] = icmp ugt i32 [[OR]], 42
+; CHECK-NEXT:    br i1 [[CMP42]], label [[END]], label [[BOGUS:%.*]]
+; CHECK:       bogus:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ], [ [[C:%.*]], [[BOGUS]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+  %sub = sub i32 32, %b
+  %shl = shl i32 %a, %sub
+  %shr = lshr i32 %a, %b
+  %or = or i32 %shl, %shr
+  %cmp42 = icmp ugt i32 %or, 42
+  br i1 %cmp42, label %end, label %bogus
+
+bogus:
+  br label %end
+
+end:
+  %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ], [ %c, %bogus ]
+  ret i32 %cond
+}
+
+; Negative test - wrong cmp (but this should match?).
+
+define i32 @not_rotr_3(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_rotr_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sle i32 [[B:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK:       rotbb:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A]], [[B]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp sle i32 %b, 0
+  br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+  %sub = sub i32 32, %b
+  %shl = shl i32 %a, %sub
+  %shr = lshr i32 %a, %b
+  %or = or i32 %shl, %shr
+  br label %end
+
+end:
+  %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
+  ret i32 %cond
+}
+
+; Negative test - wrong shift.
+
+define i32 @not_rotr_4(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_rotr_4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK:       rotbb:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = ashr i32 [[A]], [[B]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
  ; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
@@ -186,8 +361,41 @@ entry:
  rotbb:
    %sub = sub i32 32, %b
    %shl = shl i32 %a, %sub
+  %shr = ashr i32 %a, %b
+  %or = or i32 %shl, %shr
+  br label %end
+
+end:
+  %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
+  ret i32 %cond
+}
+
+; Negative test - wrong shift.
+
+define i32 @not_rotr_5(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_rotr_5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK:       rotbb:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[B]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+  %sub = sub i32 32, %b
+  %shl = shl i32 %b, %sub
    %shr = lshr i32 %a, %b
-  %or = or i32 %shr, %shl
+  %or = or i32 %shl, %shr
    br label %end
  
  end:
@@ -195,8 +403,46 @@ end:
    ret i32 %cond
  }
  
-define i32 @rotr_commute_or(i32 %a, i32 %b) {
-; CHECK-LABEL: @rotr_commute_or(
+; Negative test - wrong sub.
+
+define i32 @not_rotr_6(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_rotr_6(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK:       rotbb:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 8, [[B]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A]], [[B]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %cmp = icmp eq i32 %b, 0
+  br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+  %sub = sub i32 8, %b
+  %shl = shl i32 %a, %sub
+  %shr = lshr i32 %a, %b
+  %or = or i32 %shl, %shr
+  br label %end
+
+end:
+  %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
+  ret i32 %cond
+}
+
+; Negative test - extra use. Technically, we could transform this
+; because it doesn't increase the instruction count, but we're
+; being cautious not to cause a potential perf pessimization for
+; targets that do not have a rotate instruction.
+
+define i32 @could_be_rotr(i32 %a, i32 %b, i32* %p) {
+; CHECK-LABEL: @could_be_rotr(
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
  ; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
@@ -205,6 +451,7 @@ define i32 @rotr_commute_or(i32 %a, i32 %b) {
  ; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
  ; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[A]], [[B]]
  ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    store i32 [[OR]], i32* [[P:%.*]]
  ; CHECK-NEXT:    br label [[END]]
  ; CHECK:       end:
  ; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
@@ -219,6 +466,7 @@ rotbb:
    %shl = shl i32 %a, %sub
    %shr = lshr i32 %a, %b
    %or = or i32 %shl, %shr
+  store i32 %or, i32* %p
    br label %end
  
  end:
diff --git a/test/Transforms/PhaseOrdering/rotate.ll b/test/Transforms/PhaseOrdering/rotate.ll

index 1d33de70c296adfd91964c8a54c880ca5fb0abc6..190807668ee8d6bfa5921fcae581245b9d79be8f 100644 (file)
--- a/test/Transforms/PhaseOrdering/rotate.ll
+++ b/test/Transforms/PhaseOrdering/rotate.ll
@@ -9,17 +9,8 @@
  define i32 @rotl(i32 %a, i32 %b) {
  ; OLDPM-LABEL: @rotl(
  ; OLDPM-NEXT:  entry:
-; OLDPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
-; OLDPM-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
-; OLDPM:       rotbb:
-; OLDPM-NEXT:    [[SUB:%.*]] = sub i32 32, [[B]]
-; OLDPM-NEXT:    [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
-; OLDPM-NEXT:    [[SHL:%.*]] = shl i32 [[A]], [[B]]
-; OLDPM-NEXT:    [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
-; OLDPM-NEXT:    br label [[END]]
-; OLDPM:       end:
-; OLDPM-NEXT:    [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; OLDPM-NEXT:    ret i32 [[COND]]
+; OLDPM-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B:%.*]])
+; OLDPM-NEXT:    ret i32 [[TMP0]]
  ;
  ; NEWPM-LABEL: @rotl(
  ; NEWPM-NEXT:  entry:
author	Sanjay Patel <spatel@rotateright.com>
	Mon, 17 Dec 2018 21:14:51 +0000 (21:14 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Mon, 17 Dec 2018 21:14:51 +0000 (21:14 +0000)
lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp		patch \| blob \| history
test/Transforms/AggressiveInstCombine/rotate.ll		patch \| blob \| history
test/Transforms/PhaseOrdering/rotate.ll		patch \| blob \| history