From f99e13e425de288ff5fe82851c8648e9b5fd01e6 Mon Sep 17 00:00:00 2001 From: Bjorn Pettersson Date: Wed, 19 Jun 2019 14:28:03 +0000 Subject: [PATCH] [ConstantFolding] Add constant folding for smul.fix and smul.fix.sat Summary: This patch teaches ConstantFolding to constant fold both scalar and vector variants of llvm.smul.fix and llvm.smul.fix.sat. As described in the LangRef rounding is unspecified for these instrinsics. If the result cannot be represented exactly the default behavior in ConstantFolding is to round down towards negative infinity. If a target has a preferred rounding that is different some kind of target hook would be needed (same strategy as used by the SelectionDAG legalizer). Reviewers: nikic, leonardchan, RKSimon Reviewed By: leonardchan Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63385 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363811 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ConstantFolding.cpp | 46 +++++++ test/Analysis/ConstantFolding/smul-fix-sat.ll | 122 ++++++++++++++++++ test/Analysis/ConstantFolding/smul-fix.ll | 122 ++++++++++++++++++ 3 files changed, 290 insertions(+) create mode 100644 test/Analysis/ConstantFolding/smul-fix-sat.ll create mode 100644 test/Analysis/ConstantFolding/smul-fix.ll diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 88d003864eb..9372904f769 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -1422,6 +1422,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::uadd_sat: case Intrinsic::ssub_sat: case Intrinsic::usub_sat: + case Intrinsic::smul_fix: + case Intrinsic::smul_fix_sat: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: case Intrinsic::bitreverse: @@ -2198,6 +2200,43 @@ static Constant *ConstantFoldScalarCall3(StringRef Name, unsigned IntrinsicID, } } + if (const auto *Op1 = dyn_cast(Operands[0])) { + if (const auto *Op2 = dyn_cast(Operands[1])) { + if (const auto *Op3 = dyn_cast(Operands[2])) { + switch (IntrinsicID) { + default: break; + case Intrinsic::smul_fix: + case Intrinsic::smul_fix_sat: { + // This code performs rounding towards negative infinity in case the + // result cannot be represented exactly for the given scale. Targets + // that do care about rounding should use a target hook for specifying + // how rounding should be done, and provide their own folding to be + // consistent with rounding. This is the same approach as used by + // DAGTypeLegalizer::ExpandIntRes_MULFIX. + APInt Lhs = Op1->getValue(); + APInt Rhs = Op2->getValue(); + unsigned Scale = Op3->getValue().getZExtValue(); + unsigned Width = Lhs.getBitWidth(); + assert(Scale < Width && "Illegal scale."); + unsigned ExtendedWidth = Width * 2; + APInt Product = (Lhs.sextOrSelf(ExtendedWidth) * + Rhs.sextOrSelf(ExtendedWidth)).ashr(Scale); + if (IntrinsicID == Intrinsic::smul_fix_sat) { + APInt MaxValue = + APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth); + APInt MinValue = + APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth); + Product = APIntOps::smin(Product, MaxValue); + Product = APIntOps::smax(Product, MinValue); + } + return ConstantInt::get(Ty->getContext(), + Product.sextOrTrunc(Width)); + } + } + } + } + } + if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) { const APInt *C0, *C1, *C2; if (!getConstIntOrUndef(Operands[0], C0) || @@ -2307,6 +2346,13 @@ static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, Lane[J] = Operands[J]; continue; } + // These intrinsics use a scalar type for their third argument. + if (J == 2 && + (IntrinsicID == Intrinsic::smul_fix || + IntrinsicID == Intrinsic::smul_fix_sat)) { + Lane[J] = Operands[J]; + continue; + } Constant *Agg = Operands[J]->getAggregateElement(I); if (!Agg) diff --git a/test/Analysis/ConstantFolding/smul-fix-sat.ll b/test/Analysis/ConstantFolding/smul-fix-sat.ll new file mode 100644 index 00000000000..3caae7ce56e --- /dev/null +++ b/test/Analysis/ConstantFolding/smul-fix-sat.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s + +;----------------------------------------------------------------------------- +; Simple test using scalar layout. +;----------------------------------------------------------------------------- + +declare i32 @llvm.smul.fix.sat.i32(i32, i32, i32) + +define i32 @test_smul_fix_sat_i32_0() { +; CHECK-LABEL: @test_smul_fix_sat_i32_0( +; CHECK-NEXT: ret i32 536870912 +; + %r = call i32 @llvm.smul.fix.sat.i32(i32 1073741824, i32 1073741824, i32 31) ; 0.5 * 0.5 + ret i32 %r +} + +;----------------------------------------------------------------------------- +; More extensive tests based on vectors (basically using the scalar fold +; for each index). +;----------------------------------------------------------------------------- + +declare <8 x i3> @llvm.smul.fix.sat.v8i3(<8 x i3>, <8 x i3>, i32) + +define <8 x i3> @test_smul_fix_sat_v8i3_0() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_0( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_1() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_1( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_2() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_2( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_3() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_3( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_4() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_4( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_5() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_5( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_6() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_6( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_7() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_7( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_sat_v8i3_8() { +; CHECK-LABEL: @test_smul_fix_sat_v8i3_8( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.sat.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} diff --git a/test/Analysis/ConstantFolding/smul-fix.ll b/test/Analysis/ConstantFolding/smul-fix.ll new file mode 100644 index 00000000000..9fcf4a9d65c --- /dev/null +++ b/test/Analysis/ConstantFolding/smul-fix.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -constprop -S | FileCheck %s + +;----------------------------------------------------------------------------- +; Simple test using scalar layout. +;----------------------------------------------------------------------------- + +declare i32 @llvm.smul.fix.i32(i32, i32, i32) + +define i32 @test_smul_fix_i32_0() { +; CHECK-LABEL: @test_smul_fix_i32_0( +; CHECK-NEXT: ret i32 536870912 +; + %r = call i32 @llvm.smul.fix.i32(i32 1073741824, i32 1073741824, i32 31) ; 0.5 * 0.5 + ret i32 %r +} + +;----------------------------------------------------------------------------- +; More extensive tests based on vectors (basically using the scalar fold +; for each index). +;----------------------------------------------------------------------------- + +declare <8 x i3> @llvm.smul.fix.v8i3(<8 x i3>, <8 x i3>, i32) + +define <8 x i3> @test_smul_fix_v8i3_0() { +; CHECK-LABEL: @test_smul_fix_v8i3_0( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_1() { +; CHECK-LABEL: @test_smul_fix_v8i3_1( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_2() { +; CHECK-LABEL: @test_smul_fix_v8i3_2( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_3() { +; CHECK-LABEL: @test_smul_fix_v8i3_3( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_4() { +; CHECK-LABEL: @test_smul_fix_v8i3_4( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_5() { +; CHECK-LABEL: @test_smul_fix_v8i3_5( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_6() { +; CHECK-LABEL: @test_smul_fix_v8i3_6( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 0) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_7() { +; CHECK-LABEL: @test_smul_fix_v8i3_7( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 1) + ret <8 x i3> %r +} + +define <8 x i3> @test_smul_fix_v8i3_8() { +; CHECK-LABEL: @test_smul_fix_v8i3_8( +; CHECK-NEXT: ret <8 x i3> +; + %r = call <8 x i3> @llvm.smul.fix.v8i3( + <8 x i3> , + <8 x i3> , + i32 2) + ret <8 x i3> %r +} -- 2.50.1