From eb77a4a5377de1e7065cee9a23caa179e6d6b672 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Wed, 16 Nov 2016 00:44:47 +0000 Subject: [PATCH] [BypassSlowDivision] Handle division by constant numerators better. Summary: We don't do BypassSlowDivision when the denominator is a constant, but we do do it when the numerator is a constant. This patch makes two related changes to BypassSlowDivision when the numerator is a constant: * If the numerator is too large to fit into the bypass width, don't bypass slow division (because we'll never run the smaller-width code). * If we bypass slow division where the numerator is a constant, don't OR together the numerator and denominator when determining whether both operands fit within the bypass width. We need to check only the denominator. Reviewers: tra Subscribers: llvm-commits, jholewinski Differential Revision: https://reviews.llvm.org/D26699 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287062 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/BypassSlowDivision.cpp | 17 ++++++++- .../bypass-slow-div-constant-numerator.ll | 35 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index d74d5299db5..bc2cef26edc 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -89,6 +89,11 @@ static bool insertFastDiv(Instruction *I, IntegerType *BypassType, return false; } + // If the numerator is a constant, bail if it doesn't fit into BypassType. + if (ConstantInt *ConstDividend = dyn_cast(Dividend)) + if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth()) + return false; + // Basic Block is split before divide BasicBlock *MainBB = &*I->getParent(); BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I); @@ -150,7 +155,17 @@ static bool insertFastDiv(Instruction *I, IntegerType *BypassType, // Combine operands into a single value with OR for value testing below MainBB->getInstList().back().eraseFromParent(); IRBuilder<> MainBuilder(MainBB, MainBB->end()); - Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); + + // We should have bailed out above if the divisor is a constant, but the + // dividend may still be a constant. Set OrV to our non-constant operands + // OR'ed together. + assert(!isa(Divisor)); + + Value *OrV; + if (!isa(Dividend)) + OrV = MainBuilder.CreateOr(Dividend, Divisor); + else + OrV = Divisor; // BitMask is inverted to check if the operands are // larger than the bypass type diff --git a/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll b/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll new file mode 100644 index 00000000000..94adf197093 --- /dev/null +++ b/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div-constant-numerator.ll @@ -0,0 +1,35 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; When we bypass slow div with a constant numerator which fits into the bypass +; width, we still emit the bypass code, but we don't 'or' the numerator with +; the denominator. +; CHECK-LABEL: @small_constant_numer +define i64 @small_constant_numer(i64 %a) { + ; CHECK: [[AND:%[0-9]+]] = and i64 %a, -4294967296 + ; CHECK: icmp eq i64 [[AND]], 0 + + ; CHECK: [[TRUNC:%[0-9]+]] = trunc i64 %a to i32 + ; CHECK: udiv i32 -1, [[TRUNC]] + %d = sdiv i64 4294967295, %a ; 0xffff'ffff + ret i64 %d +} + +; When we try to bypass slow div with a constant numerator which *doesn't* fit +; into the bypass width, leave it as a plain 64-bit div with no bypass. +; CHECK-LABEL: @large_constant_numer +define i64 @large_constant_numer(i64 %a) { + ; CHECK-NOT: udiv i32 + %d = sdiv i64 4294967296, %a ; 0x1'0000'0000 + ret i64 %d +} + +; For good measure, try a value larger than 2^32. +; CHECK-LABEL: @larger_constant_numer +define i64 @larger_constant_numer(i64 %a) { + ; CHECK-NOT: udiv i32 + %d = sdiv i64 5000000000, %a + ret i64 %d +} -- 2.40.0