case Intrinsic::uadd_sat:
case Intrinsic::ssub_sat:
case Intrinsic::usub_sat:
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
case Intrinsic::bitreverse:
}
}
+ if (const auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+ if (const auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+ if (const auto *Op3 = dyn_cast<ConstantInt>(Operands[2])) {
+ switch (IntrinsicID) {
+ default: break;
+ case Intrinsic::smul_fix:
+ case Intrinsic::smul_fix_sat: {
+ // This code performs rounding towards negative infinity in case the
+ // result cannot be represented exactly for the given scale. Targets
+ // that do care about rounding should use a target hook for specifying
+ // how rounding should be done, and provide their own folding to be
+ // consistent with rounding. This is the same approach as used by
+ // DAGTypeLegalizer::ExpandIntRes_MULFIX.
+ APInt Lhs = Op1->getValue();
+ APInt Rhs = Op2->getValue();
+ unsigned Scale = Op3->getValue().getZExtValue();
+ unsigned Width = Lhs.getBitWidth();
+ assert(Scale < Width && "Illegal scale.");
+ unsigned ExtendedWidth = Width * 2;
+ APInt Product = (Lhs.sextOrSelf(ExtendedWidth) *
+ Rhs.sextOrSelf(ExtendedWidth)).ashr(Scale);
+ if (IntrinsicID == Intrinsic::smul_fix_sat) {
+ APInt MaxValue =
+ APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth);
+ APInt MinValue =
+ APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth);
+ Product = APIntOps::smin(Product, MaxValue);
+ Product = APIntOps::smax(Product, MinValue);
+ }
+ return ConstantInt::get(Ty->getContext(),
+ Product.sextOrTrunc(Width));
+ }
+ }
+ }
+ }
+ }
+
if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
const APInt *C0, *C1, *C2;
if (!getConstIntOrUndef(Operands[0], C0) ||
Lane[J] = Operands[J];
continue;
}
+ // These intrinsics use a scalar type for their third argument.
+ if (J == 2 &&
+ (IntrinsicID == Intrinsic::smul_fix ||
+ IntrinsicID == Intrinsic::smul_fix_sat)) {
+ Lane[J] = Operands[J];
+ continue;
+ }
Constant *Agg = Operands[J]->getAggregateElement(I);
if (!Agg)
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+;-----------------------------------------------------------------------------
+; Simple test using scalar layout.
+;-----------------------------------------------------------------------------
+
+declare i32 @llvm.smul.fix.sat.i32(i32, i32, i32)
+
+define i32 @test_smul_fix_sat_i32_0() {
+; CHECK-LABEL: @test_smul_fix_sat_i32_0(
+; CHECK-NEXT: ret i32 536870912
+;
+ %r = call i32 @llvm.smul.fix.sat.i32(i32 1073741824, i32 1073741824, i32 31) ; 0.5 * 0.5
+ ret i32 %r
+}
+
+;-----------------------------------------------------------------------------
+; More extensive tests based on vectors (basically using the scalar fold
+; for each index).
+;-----------------------------------------------------------------------------
+
+declare <8 x i3> @llvm.smul.fix.sat.v8i3(<8 x i3>, <8 x i3>, i32)
+
+define <8 x i3> @test_smul_fix_sat_v8i3_0() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_0(
+; CHECK-NEXT: ret <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 0, i3 -4, i3 -4, i3 -4>
+;
+ %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+ i32 0)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_1() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_1(
+; CHECK-NEXT: ret <8 x i3> <i3 3, i3 3, i3 3, i3 2, i3 0, i3 -2, i3 -4, i3 -4>
+;
+ %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+ i32 1)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_2() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_2(
+; CHECK-NEXT: ret <8 x i3> <i3 3, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+ %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+ i32 2)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_3() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_3(
+; CHECK-NEXT: ret <8 x i3> <i3 3, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+ %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+ i32 0)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_4() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_4(
+; CHECK-NEXT: ret <8 x i3> <i3 2, i3 1, i3 1, i3 0, i3 0, i3 -1, i3 -1, i3 -2>
+;
+ %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+ i32 1)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_5() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_5(
+; CHECK-NEXT: ret <8 x i3> <i3 1, i3 0, i3 0, i3 0, i3 0, i3 -1, i3 -1, i3 -1>
+;
+ %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+ i32 2)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_6() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_6(
+; CHECK-NEXT: ret <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -3, i3 0, i3 3, i3 3, i3 3>
+;
+ %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+ i32 0)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_7() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_7(
+; CHECK-NEXT: ret <8 x i3> <i3 -4, i3 -4, i3 -3, i3 -2, i3 0, i3 1, i3 3, i3 3>
+;
+ %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+ i32 1)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_8() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_8(
+; CHECK-NEXT: ret <8 x i3> <i3 -3, i3 -3, i3 -2, i3 -1, i3 0, i3 0, i3 1, i3 2>
+;
+ %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+ i32 2)
+ ret <8 x i3> %r
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+;-----------------------------------------------------------------------------
+; Simple test using scalar layout.
+;-----------------------------------------------------------------------------
+
+declare i32 @llvm.smul.fix.i32(i32, i32, i32)
+
+define i32 @test_smul_fix_i32_0() {
+; CHECK-LABEL: @test_smul_fix_i32_0(
+; CHECK-NEXT: ret i32 536870912
+;
+ %r = call i32 @llvm.smul.fix.i32(i32 1073741824, i32 1073741824, i32 31) ; 0.5 * 0.5
+ ret i32 %r
+}
+
+;-----------------------------------------------------------------------------
+; More extensive tests based on vectors (basically using the scalar fold
+; for each index).
+;-----------------------------------------------------------------------------
+
+declare <8 x i3> @llvm.smul.fix.v8i3(<8 x i3>, <8 x i3>, i32)
+
+define <8 x i3> @test_smul_fix_v8i3_0() {
+; CHECK-LABEL: @test_smul_fix_v8i3_0(
+; CHECK-NEXT: ret <8 x i3> <i3 0, i3 -4, i3 0, i3 -4, i3 0, i3 -4, i3 0, i3 -4>
+;
+ %r = call <8 x i3> @llvm.smul.fix.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+ i32 0)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_1() {
+; CHECK-LABEL: @test_smul_fix_v8i3_1(
+; CHECK-NEXT: ret <8 x i3> <i3 0, i3 -2, i3 -4, i3 2, i3 0, i3 -2, i3 -4, i3 2>
+;
+ %r = call <8 x i3> @llvm.smul.fix.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+ i32 1)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_2() {
+; CHECK-LABEL: @test_smul_fix_v8i3_2(
+; CHECK-NEXT: ret <8 x i3> <i3 -4, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+ %r = call <8 x i3> @llvm.smul.fix.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+ i32 2)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_3() {
+; CHECK-LABEL: @test_smul_fix_v8i3_3(
+; CHECK-NEXT: ret <8 x i3> <i3 -4, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+ %r = call <8 x i3> @llvm.smul.fix.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+ i32 0)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_4() {
+; CHECK-LABEL: @test_smul_fix_v8i3_4(
+; CHECK-NEXT: ret <8 x i3> <i3 2, i3 1, i3 1, i3 0, i3 0, i3 -1, i3 -1, i3 -2>
+;
+ %r = call <8 x i3> @llvm.smul.fix.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+ i32 1)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_5() {
+; CHECK-LABEL: @test_smul_fix_v8i3_5(
+; CHECK-NEXT: ret <8 x i3> <i3 1, i3 0, i3 0, i3 0, i3 0, i3 -1, i3 -1, i3 -1>
+;
+ %r = call <8 x i3> @llvm.smul.fix.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+ i32 2)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_6() {
+; CHECK-LABEL: @test_smul_fix_v8i3_6(
+; CHECK-NEXT: ret <8 x i3> <i3 -4, i3 -1, i3 2, i3 -3, i3 0, i3 3, i3 -2, i3 1>
+;
+ %r = call <8 x i3> @llvm.smul.fix.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+ i32 0)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_7() {
+; CHECK-LABEL: @test_smul_fix_v8i3_7(
+; CHECK-NEXT: ret <8 x i3> <i3 2, i3 3, i3 -3, i3 -2, i3 0, i3 1, i3 3, i3 -4>
+;
+ %r = call <8 x i3> @llvm.smul.fix.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+ i32 1)
+ ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_8() {
+; CHECK-LABEL: @test_smul_fix_v8i3_8(
+; CHECK-NEXT: ret <8 x i3> <i3 -3, i3 -3, i3 -2, i3 -1, i3 0, i3 0, i3 1, i3 2>
+;
+ %r = call <8 x i3> @llvm.smul.fix.v8i3(
+ <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+ <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+ i32 2)
+ ret <8 x i3> %r
+}