II->setArgOperand(2, ModuloC);
return II;
}
- // Canonicalize rotate right by constant to rotate left. This is not
- // entirely arbitrary. For historical reasons, the backend may recognize
- // rotate left patterns but miss rotate right patterns.
- if (II->getIntrinsicID() == Intrinsic::fshr && Op0 == Op1) {
- // fshr X, X, C --> fshl X, X, (BitWidth - C)
+ // Canonicalize funnel shift right by constant to funnel shift left. This
+ // is not entirely arbitrary. For historical reasons, the backend may
+ // recognize rotate left patterns but miss rotate right patterns.
+ if (II->getIntrinsicID() == Intrinsic::fshr) {
+ // fshr X, Y, C --> fshl X, Y, (BitWidth - C)
assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&
"Shift amount expected to be modulo bitwidth");
Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
Module *Mod = II->getModule();
Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
- return CallInst::Create(Fshl, { Op0, Op0, LeftShiftC });
+ return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
}
}
+ // TODO: Pull this into the block above. We can handle semi-arbitrary vector
+ // shift amount constants as well as splats.
const APInt *SA;
if (match(II->getArgOperand(2), m_APInt(SA))) {
uint64_t ShiftAmt = SA->urem(BitWidth);
assert(ShiftAmt != 0 && "SimplifyCall should have handled zero shift");
- // Normalize to funnel shift left.
- if (II->getIntrinsicID() == Intrinsic::fshr)
- ShiftAmt = BitWidth - ShiftAmt;
+ assert(II->getIntrinsicID() == Intrinsic::fshl &&
+ "All funnel shifts by simple constants should go left");
// fshl(X, 0, C) -> shl X, C
// fshl(X, undef, C) -> shl X, C
define <2 x i32> @fshr_op0_undef_vec(<2 x i32> %x) {
; CHECK-LABEL: @fshr_op0_undef_vec(
-; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> [[X:%.*]], <2 x i32> <i32 31, i32 1>)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> undef, <2 x i32> [[X:%.*]], <2 x i32> <i32 1, i32 31>)
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 -1, i32 33>)
define <2 x i32> @fshr_op1_zero_vec(<2 x i32> %x) {
; CHECK-LABEL: @fshr_op1_zero_vec(
-; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> zeroinitializer, <2 x i32> <i32 31, i32 1>)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 31>)
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 -1, i32 33>)
define i33 @fshr_constant_shift_amount_modulo_bitwidth(i33 %x, i33 %y) {
; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth(
-; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 1)
+; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 32)
; CHECK-NEXT: ret i33 [[R]]
;
%r = call i33 @llvm.fshr.i33(i33 %x, i33 %y, i33 34)
define <2 x i32> @fshr_constant_shift_amount_modulo_bitwidth_vec(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @fshr_constant_shift_amount_modulo_bitwidth_vec(
-; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> <i32 2, i32 31>)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i32> <i32 30, i32 1>)
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 34, i32 -1>)
define i33 @fshr_both_ops_demanded(i33 %x, i33 %y) {
; CHECK-LABEL: @fshr_both_ops_demanded(
-; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshr.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 26)
+; CHECK-NEXT: [[Z:%.*]] = call i33 @llvm.fshl.i33(i33 [[X:%.*]], i33 [[Y:%.*]], i33 7)
; CHECK-NEXT: [[R:%.*]] = and i33 [[Z]], 192
; CHECK-NEXT: ret i33 [[R]]
;