[ConstantFolding] Add constant folding for smul.fix and smul.fix.sat

author Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>

Wed, 19 Jun 2019 14:28:03 +0000 (14:28 +0000)

committer Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>

Wed, 19 Jun 2019 14:28:03 +0000 (14:28 +0000)
author Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Wed, 19 Jun 2019 14:28:03 +0000 (14:28 +0000)
committer Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Wed, 19 Jun 2019 14:28:03 +0000 (14:28 +0000)
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp

index 88d003864ebd4576261332a02c5c8a54f4012b37..9372904f769d1b180c359581cef7cc4789e5ac9e 100644 (file)
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1422,6 +1422,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
    case Intrinsic::uadd_sat:
    case Intrinsic::ssub_sat:
    case Intrinsic::usub_sat:
+  case Intrinsic::smul_fix:
+  case Intrinsic::smul_fix_sat:
    case Intrinsic::convert_from_fp16:
    case Intrinsic::convert_to_fp16:
    case Intrinsic::bitreverse:
@@ -2198,6 +2200,43 @@ static Constant *ConstantFoldScalarCall3(StringRef Name, unsigned IntrinsicID,
      }
    }
  
+  if (const auto *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+    if (const auto *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+      if (const auto *Op3 = dyn_cast<ConstantInt>(Operands[2])) {
+        switch (IntrinsicID) {
+        default: break;
+        case Intrinsic::smul_fix:
+        case Intrinsic::smul_fix_sat: {
+          // This code performs rounding towards negative infinity in case the
+          // result cannot be represented exactly for the given scale. Targets
+          // that do care about rounding should use a target hook for specifying
+          // how rounding should be done, and provide their own folding to be
+          // consistent with rounding. This is the same approach as used by
+          // DAGTypeLegalizer::ExpandIntRes_MULFIX.
+          APInt Lhs = Op1->getValue();
+          APInt Rhs = Op2->getValue();
+          unsigned Scale = Op3->getValue().getZExtValue();
+          unsigned Width = Lhs.getBitWidth();
+          assert(Scale < Width && "Illegal scale.");
+          unsigned ExtendedWidth = Width * 2;
+          APInt Product = (Lhs.sextOrSelf(ExtendedWidth) *
+                           Rhs.sextOrSelf(ExtendedWidth)).ashr(Scale);
+          if (IntrinsicID == Intrinsic::smul_fix_sat) {
+            APInt MaxValue =
+              APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth);
+            APInt MinValue =
+              APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth);
+            Product = APIntOps::smin(Product, MaxValue);
+            Product = APIntOps::smax(Product, MinValue);
+          }
+          return ConstantInt::get(Ty->getContext(),
+                                  Product.sextOrTrunc(Width));
+        }
+        }
+      }
+    }
+  }
+
    if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
      const APInt *C0, *C1, *C2;
      if (!getConstIntOrUndef(Operands[0], C0) ||
@@ -2307,6 +2346,13 @@ static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
          Lane[J] = Operands[J];
          continue;
        }
+      // These intrinsics use a scalar type for their third argument.
+      if (J == 2 &&
+          (IntrinsicID == Intrinsic::smul_fix ||
+           IntrinsicID == Intrinsic::smul_fix_sat)) {
+        Lane[J] = Operands[J];
+        continue;
+      }
  
        Constant *Agg = Operands[J]->getAggregateElement(I);
        if (!Agg)
diff --git a/test/Analysis/ConstantFolding/smul-fix-sat.ll b/test/Analysis/ConstantFolding/smul-fix-sat.ll

new file mode 100644 (file)

index 0000000..3caae7c
--- /dev/null
+++ b/test/Analysis/ConstantFolding/smul-fix-sat.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+;-----------------------------------------------------------------------------
+; Simple test using scalar layout.
+;-----------------------------------------------------------------------------
+
+declare i32 @llvm.smul.fix.sat.i32(i32, i32, i32)
+
+define i32 @test_smul_fix_sat_i32_0() {
+; CHECK-LABEL: @test_smul_fix_sat_i32_0(
+; CHECK-NEXT:    ret i32 536870912
+;
+  %r = call i32 @llvm.smul.fix.sat.i32(i32 1073741824, i32 1073741824, i32 31) ; 0.5 * 0.5
+  ret i32 %r
+}
+
+;-----------------------------------------------------------------------------
+; More extensive tests based on vectors (basically using the scalar fold
+; for each index).
+;-----------------------------------------------------------------------------
+
+declare <8 x i3> @llvm.smul.fix.sat.v8i3(<8 x i3>, <8 x i3>, i32)
+
+define <8 x i3> @test_smul_fix_sat_v8i3_0() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_0(
+; CHECK-NEXT:    ret <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 0, i3 -4, i3 -4, i3 -4>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_1() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_1(
+; CHECK-NEXT:    ret <8 x i3> <i3 3, i3 3, i3 3, i3 2, i3 0, i3 -2, i3 -4, i3 -4>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_2() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_2(
+; CHECK-NEXT:    ret <8 x i3> <i3 3, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 2)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_3() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_3(
+; CHECK-NEXT:    ret <8 x i3> <i3 3, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_4() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_4(
+; CHECK-NEXT:    ret <8 x i3> <i3 2, i3 1, i3 1, i3 0, i3 0, i3 -1, i3 -1, i3 -2>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_5() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_5(
+; CHECK-NEXT:    ret <8 x i3> <i3 1, i3 0, i3 0, i3 0, i3 0, i3 -1, i3 -1, i3 -1>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 2)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_6() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_6(
+; CHECK-NEXT:    ret <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -3, i3 0, i3 3, i3 3, i3 3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_7() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_7(
+; CHECK-NEXT:    ret <8 x i3> <i3 -4, i3 -4, i3 -3, i3 -2, i3 0, i3 1, i3 3, i3 3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_sat_v8i3_8() {
+; CHECK-LABEL: @test_smul_fix_sat_v8i3_8(
+; CHECK-NEXT:    ret <8 x i3> <i3 -3, i3 -3, i3 -2, i3 -1, i3 0, i3 0, i3 1, i3 2>
+;
+  %r = call <8 x i3> @llvm.smul.fix.sat.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 2)
+  ret <8 x i3> %r
+}
diff --git a/test/Analysis/ConstantFolding/smul-fix.ll b/test/Analysis/ConstantFolding/smul-fix.ll

new file mode 100644 (file)

index 0000000..9fcf4a9
--- /dev/null
+++ b/test/Analysis/ConstantFolding/smul-fix.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+;-----------------------------------------------------------------------------
+; Simple test using scalar layout.
+;-----------------------------------------------------------------------------
+
+declare i32 @llvm.smul.fix.i32(i32, i32, i32)
+
+define i32 @test_smul_fix_i32_0() {
+; CHECK-LABEL: @test_smul_fix_i32_0(
+; CHECK-NEXT:    ret i32 536870912
+;
+  %r = call i32 @llvm.smul.fix.i32(i32 1073741824, i32 1073741824, i32 31) ; 0.5 * 0.5
+  ret i32 %r
+}
+
+;-----------------------------------------------------------------------------
+; More extensive tests based on vectors (basically using the scalar fold
+; for each index).
+;-----------------------------------------------------------------------------
+
+declare <8 x i3> @llvm.smul.fix.v8i3(<8 x i3>, <8 x i3>, i32)
+
+define <8 x i3> @test_smul_fix_v8i3_0() {
+; CHECK-LABEL: @test_smul_fix_v8i3_0(
+; CHECK-NEXT:    ret <8 x i3> <i3 0, i3 -4, i3 0, i3 -4, i3 0, i3 -4, i3 0, i3 -4>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_1() {
+; CHECK-LABEL: @test_smul_fix_v8i3_1(
+; CHECK-NEXT:    ret <8 x i3> <i3 0, i3 -2, i3 -4, i3 2, i3 0, i3 -2, i3 -4, i3 2>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_2() {
+; CHECK-LABEL: @test_smul_fix_v8i3_2(
+; CHECK-NEXT:    ret <8 x i3> <i3 -4, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4, i3 -4>,
+  i32 2)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_3() {
+; CHECK-LABEL: @test_smul_fix_v8i3_3(
+; CHECK-NEXT:    ret <8 x i3> <i3 -4, i3 3, i3 2, i3 1, i3 0, i3 -1, i3 -2, i3 -3>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_4() {
+; CHECK-LABEL: @test_smul_fix_v8i3_4(
+; CHECK-NEXT:    ret <8 x i3> <i3 2, i3 1, i3 1, i3 0, i3 0, i3 -1, i3 -1, i3 -2>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_5() {
+; CHECK-LABEL: @test_smul_fix_v8i3_5(
+; CHECK-NEXT:    ret <8 x i3> <i3 1, i3 0, i3 0, i3 0, i3 0, i3 -1, i3 -1, i3 -1>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1, i3 -1>,
+  i32 2)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_6() {
+; CHECK-LABEL: @test_smul_fix_v8i3_6(
+; CHECK-NEXT:    ret <8 x i3> <i3 -4, i3 -1, i3 2, i3 -3, i3 0, i3 3, i3 -2, i3 1>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 0)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_7() {
+; CHECK-LABEL: @test_smul_fix_v8i3_7(
+; CHECK-NEXT:    ret <8 x i3> <i3 2, i3 3, i3 -3, i3 -2, i3 0, i3 1, i3 3, i3 -4>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 1)
+  ret <8 x i3> %r
+}
+
+define <8 x i3> @test_smul_fix_v8i3_8() {
+; CHECK-LABEL: @test_smul_fix_v8i3_8(
+; CHECK-NEXT:    ret <8 x i3> <i3 -3, i3 -3, i3 -2, i3 -1, i3 0, i3 0, i3 1, i3 2>
+;
+  %r = call <8 x i3> @llvm.smul.fix.v8i3(
+  <8 x i3> <i3 -4, i3 -3, i3 -2, i3 -1, i3 0, i3 1, i3 2, i3 3>,
+  <8 x i3> <i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3, i3 3>,
+  i32 2)
+  ret <8 x i3> %r
+}
author	Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
	Wed, 19 Jun 2019 14:28:03 +0000 (14:28 +0000)
committer	Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
	Wed, 19 Jun 2019 14:28:03 +0000 (14:28 +0000)
lib/Analysis/ConstantFolding.cpp		patch \| blob \| history
test/Analysis/ConstantFolding/smul-fix-sat.ll	[new file with mode: 0644]	patch \| blob
test/Analysis/ConstantFolding/smul-fix.ll	[new file with mode: 0644]	patch \| blob