};
} // namespace
+/// Match a pattern for a bitwise rotate operation that partially guards
+/// against undefined behavior by branching around the rotation when the shift
+/// amount is 0.
+static bool foldGuardedRotateToFunnelShift(Instruction &I) {
+ if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
+ return false;
+
+ // As with the one-use checks below, this is not strictly necessary, but we
+ // are being cautious to avoid potential perf regressions on targets that
+ // do not actually have a rotate instruction (where the funnel shift would be
+ // expanded back into math/shift/logic ops).
+ if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
+ return false;
+
+ // Match V to funnel shift left/right and capture the source operand and
+ // shift amount in X and Y.
+ auto matchRotate = [](Value *V, Value *&X, Value *&Y) {
+ Value *L0, *L1, *R0, *R1;
+ unsigned Width = V->getType()->getScalarSizeInBits();
+ auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1));
+
+ // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y))
+ auto RotL = m_OneUse(m_c_Or(m_Shl(m_Value(L0), m_Value(L1)),
+ m_LShr(m_Value(R0), Sub)));
+ if (RotL.match(V) && L0 == R0 && L1 == R1) {
+ X = L0;
+ Y = L1;
+ return Intrinsic::fshl;
+ }
+
+ // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y))
+ auto RotR = m_OneUse(m_c_Or(m_LShr(m_Value(L0), m_Value(L1)),
+ m_Shl(m_Value(R0), Sub)));
+ if (RotR.match(V) && L0 == R0 && L1 == R1) {
+ X = L0;
+ Y = L1;
+ return Intrinsic::fshr;
+ }
+
+ return Intrinsic::not_intrinsic;
+ };
+
+ // One phi operand must be a rotate operation, and the other phi operand must
+ // be the source value of that rotate operation:
+ // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ]
+ PHINode &Phi = cast<PHINode>(I);
+ Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
+ Value *RotSrc, *RotAmt;
+ Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt);
+ if (IID == Intrinsic::not_intrinsic || RotSrc != P1) {
+ IID = matchRotate(P1, RotSrc, RotAmt);
+ if (IID == Intrinsic::not_intrinsic || RotSrc != P0)
+ return false;
+ assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
+ "Pattern must match funnel shift left or right");
+ }
+
+ // The incoming block with our source operand must be the "guard" block.
+ // That must contain a cmp+branch to avoid the rotate when the shift amount
+ // is equal to 0. The other incoming block is the block with the rotate.
+ BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1);
+ BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1);
+ Instruction *TermI = GuardBB->getTerminator();
+ BasicBlock *TrueBB, *FalseBB;
+ ICmpInst::Predicate Pred;
+ if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()),
+ TrueBB, FalseBB)))
+ return false;
+
+ BasicBlock *PhiBB = Phi.getParent();
+ if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB)
+ return false;
+
+ // We matched a variation of this IR pattern:
+ // GuardBB:
+ // %cmp = icmp eq i32 %RotAmt, 0
+ // br i1 %cmp, label %PhiBB, label %RotBB
+ // RotBB:
+ // %sub = sub i32 32, %RotAmt
+ // %shr = lshr i32 %X, %sub
+ // %shl = shl i32 %X, %RotAmt
+ // %rot = or i32 %shr, %shl
+ // br label %PhiBB
+ // PhiBB:
+ // %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ]
+ // -->
+ // llvm.fshl.i32(i32 %X, i32 %RotAmt)
+ IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
+ Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
+ Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt}));
+ return true;
+}
+
/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
/// of 'and' ops, then we also need to capture the fact that we saw an
// Also, we want to avoid matching partial patterns.
// TODO: It would be more efficient if we removed dead instructions
// iteratively in this loop rather than waiting until the end.
- for (Instruction &I : make_range(BB.rbegin(), BB.rend()))
+ for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
MadeChange |= foldAnyOrAllBitsSet(I);
+ MadeChange |= foldGuardedRotateToFunnelShift(I);
+ }
}
// We're done with transforms, so remove dead instructions.
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A]], [[B]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[ROTBB]] ], [ 2, [[ENTRY]] ]
-; CHECK-NEXT: [[RES:%.*]] = or i32 [[COND]], [[OTHER]]
+; CHECK-NEXT: [[OTHER:%.*]] = phi i32 [ 1, [[ROTBB]] ], [ 2, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT: [[RES:%.*]] = or i32 [[TMP0]], [[OTHER]]
; CHECK-NEXT: ret i32 [[RES]]
;
entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
-; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[OR]], [[ROTBB]] ], [ [[A]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%cmp = icmp eq i32 %b, 0
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK: rotbb:
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+ %sub = sub i32 32, %b
+ %shl = shl i32 %a, %sub
+ %shr = lshr i32 %a, %b
+ %or = or i32 %shr, %shl
+ br label %end
+
+end:
+ %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
+ ret i32 %cond
+}
+
+define i32 @rotr_commute_or(i32 %a, i32 %b) {
+; CHECK-LABEL: @rotr_commute_or(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK: rotbb:
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshr.i32(i32 [[A:%.*]], i32 [[A]], i32 [[B]])
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+ %sub = sub i32 32, %b
+ %shl = shl i32 %a, %sub
+ %shr = lshr i32 %a, %b
+ %or = or i32 %shl, %shr
+ br label %end
+
+end:
+ %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
+ ret i32 %cond
+}
+
+; Negative test - non-power-of-2 might require urem expansion in the backend.
+
+define i12 @could_be_rotr_weird_type(i12 %a, i12 %b) {
+; CHECK-LABEL: @could_be_rotr_weird_type(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i12 [[B:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK: rotbb:
+; CHECK-NEXT: [[SUB:%.*]] = sub i12 12, [[B]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i12 [[A:%.*]], [[SUB]]
+; CHECK-NEXT: [[SHR:%.*]] = lshr i12 [[A]], [[B]]
+; CHECK-NEXT: [[OR:%.*]] = or i12 [[SHL]], [[SHR]]
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[COND:%.*]] = phi i12 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
+; CHECK-NEXT: ret i12 [[COND]]
+;
+entry:
+ %cmp = icmp eq i12 %b, 0
+ br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+ %sub = sub i12 12, %b
+ %shl = shl i12 %a, %sub
+ %shr = lshr i12 %a, %b
+ %or = or i12 %shl, %shr
+ br label %end
+
+end:
+ %cond = phi i12 [ %a, %entry ], [ %or, %rotbb ]
+ ret i12 %cond
+}
+
+; Negative test - wrong phi ops.
+
+define i32 @not_rotr_1(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_rotr_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK: rotbb:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[B]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+entry:
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+ %sub = sub i32 32, %b
+ %shl = shl i32 %a, %sub
+ %shr = lshr i32 %a, %b
+ %or = or i32 %shl, %shr
+ br label %end
+
+end:
+ %cond = phi i32 [ %b, %entry ], [ %or, %rotbb ]
+ ret i32 %cond
+}
+
+; Negative test - too many phi ops.
+
+define i32 @not_rotr_2(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @not_rotr_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK: rotbb:
; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT: [[CMP42:%.*]] = icmp ugt i32 [[OR]], 42
+; CHECK-NEXT: br i1 [[CMP42]], label [[END]], label [[BOGUS:%.*]]
+; CHECK: bogus:
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ], [ [[C:%.*]], [[BOGUS]] ]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+entry:
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+ %sub = sub i32 32, %b
+ %shl = shl i32 %a, %sub
+ %shr = lshr i32 %a, %b
+ %or = or i32 %shl, %shr
+ %cmp42 = icmp ugt i32 %or, 42
+ br i1 %cmp42, label %end, label %bogus
+
+bogus:
+ br label %end
+
+end:
+ %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ], [ %c, %bogus ]
+ ret i32 %cond
+}
+
+; Negative test - wrong cmp (but this should match?).
+
+define i32 @not_rotr_3(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_rotr_3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[B:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK: rotbb:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+entry:
+ %cmp = icmp sle i32 %b, 0
+ br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+ %sub = sub i32 32, %b
+ %shl = shl i32 %a, %sub
+ %shr = lshr i32 %a, %b
+ %or = or i32 %shl, %shr
+ br label %end
+
+end:
+ %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
+ ret i32 %cond
+}
+
+; Negative test - wrong shift.
+
+define i32 @not_rotr_4(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_rotr_4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK: rotbb:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
+; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[A]], [[B]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
rotbb:
%sub = sub i32 32, %b
%shl = shl i32 %a, %sub
+ %shr = ashr i32 %a, %b
+ %or = or i32 %shl, %shr
+ br label %end
+
+end:
+ %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
+ ret i32 %cond
+}
+
+; Negative test - wrong shift.
+
+define i32 @not_rotr_5(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_rotr_5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK: rotbb:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 32, [[B]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[B]], [[SUB]]
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A:%.*]], [[B]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+entry:
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+ %sub = sub i32 32, %b
+ %shl = shl i32 %b, %sub
%shr = lshr i32 %a, %b
- %or = or i32 %shr, %shl
+ %or = or i32 %shl, %shr
br label %end
end:
ret i32 %cond
}
-define i32 @rotr_commute_or(i32 %a, i32 %b) {
-; CHECK-LABEL: @rotr_commute_or(
+; Negative test - wrong sub.
+
+define i32 @not_rotr_6(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_rotr_6(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
+; CHECK: rotbb:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 8, [[B]]
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+entry:
+ %cmp = icmp eq i32 %b, 0
+ br i1 %cmp, label %end, label %rotbb
+
+rotbb:
+ %sub = sub i32 8, %b
+ %shl = shl i32 %a, %sub
+ %shr = lshr i32 %a, %b
+ %or = or i32 %shl, %shr
+ br label %end
+
+end:
+ %cond = phi i32 [ %a, %entry ], [ %or, %rotbb ]
+ ret i32 %cond
+}
+
+; Negative test - extra use. Technically, we could transform this
+; because it doesn't increase the instruction count, but we're
+; being cautious not to cause a potential perf pessimization for
+; targets that do not have a rotate instruction.
+
+define i32 @could_be_rotr(i32 %a, i32 %b, i32* %p) {
+; CHECK-LABEL: @could_be_rotr(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[ROTBB:%.*]]
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[A:%.*]], [[SUB]]
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[A]], [[B]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT: store i32 [[OR]], i32* [[P:%.*]]
; CHECK-NEXT: br label [[END]]
; CHECK: end:
; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[A]], [[ENTRY:%.*]] ], [ [[OR]], [[ROTBB]] ]
%shl = shl i32 %a, %sub
%shr = lshr i32 %a, %b
%or = or i32 %shl, %shr
+ store i32 %or, i32* %p
br label %end
end: