From 1bf013b0689a83873d2134dda61e84590f0bfb6d Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 21 May 2019 07:56:47 +0000 Subject: [PATCH] [ARM][CGP] Skip nuw in PrepareConstants PrepareConstants step converts add/sub with 'negative' immediates to sub/add with a 'positive' imm to make promotion more simple. nuw already states that the add shouldn't cause an unsigned wrap, so it shouldn't need any tweaking. Plus, we also don't allow a sub with a 'negative' immediate to be safe wrap, so this functionality has been removed. The PrepareConstants step now just handles the add instructions that we've determined would be safe if they wrap around zero. Differential Revision: https://reviews.llvm.org/D62057 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361227 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMCodeGenPrepare.cpp | 124 ++++++++++------------- test/CodeGen/ARM/CGP/arm-cgp-overflow.ll | 36 ++++++- 2 files changed, 85 insertions(+), 75 deletions(-) diff --git a/lib/Target/ARM/ARMCodeGenPrepare.cpp b/lib/Target/ARM/ARMCodeGenPrepare.cpp index 93a715aa194..83d5b860fbd 100644 --- a/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ b/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -123,9 +123,10 @@ class IRPromoter { SmallPtrSetImpl *Sources; SmallPtrSetImpl *Sinks; SmallPtrSetImpl *SafeToPromote; + SmallPtrSetImpl *SafeWrap; void ReplaceAllUsersOfWith(Value *From, Value *To); - void PrepareConstants(void); + void PrepareWrappingAdds(void); void ExtendSources(void); void ConvertTruncs(void); void PromoteTree(void); @@ -141,7 +142,8 @@ public: SetVector &Visited, SmallPtrSetImpl &Sources, SmallPtrSetImpl &Sinks, - SmallPtrSetImpl &SafeToPromote); + SmallPtrSetImpl &SafeToPromote, + SmallPtrSetImpl &SafeWrap); }; class ARMCodeGenPrepare : public FunctionPass { @@ -149,8 +151,9 @@ class ARMCodeGenPrepare : public FunctionPass { IRPromoter *Promoter = nullptr; std::set AllVisited; SmallPtrSet SafeToPromote; + SmallPtrSet SafeWrap; - bool isSafeOverflow(Instruction *I); + bool isSafeWrap(Instruction *I); bool isSupportedValue(Value *V); bool isLegalToPromote(Value *V); bool TryToPromote(Value *V); @@ -278,19 +281,14 @@ static bool isSink(Value *V) { return isa(V); } -/// Return whether the instruction can be promoted within any modifications to -/// its operands or result. -bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) { - // FIXME Do we need NSW too? - if (isa(I) && I->hasNoUnsignedWrap()) - return true; - - // We can support a, potentially, overflowing instruction (I) if: +/// Return whether this instruction can safely wrap. +bool ARMCodeGenPrepare::isSafeWrap(Instruction *I) { + // We can support a, potentially, wrapping instruction (I) if: // - It is only used by an unsigned icmp. // - The icmp uses a constant. - // - The overflowing value (I) is decreasing, i.e would underflow - wrapping + // - The wrapping value (I) is decreasing, i.e would underflow - wrapping // around zero to become a larger number than before. - // - The underflowing instruction (I) also uses a constant. + // - The wrapping instruction (I) also uses a constant. // // We can then use the two constants to calculate whether the result would // wrap in respect to itself in the original bitwidth. If it doesn't wrap, @@ -392,6 +390,7 @@ bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) { return false; LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n"); + SafeWrap.insert(I); return true; } @@ -415,13 +414,16 @@ static bool shouldPromote(Value *V) { /// Return whether we can safely mutate V's type to ExtTy without having to be /// concerned with zero extending or truncation. static bool isPromotedResultSafe(Value *V) { + if (GenerateSignBits(V)) + return false; + if (!isa(V)) return true; - if (GenerateSignBits(V)) - return false; + if (!isa(V)) + return true; - return !isa(V); + return cast(V)->hasNoUnsignedWrap(); } /// Return the intrinsic for the instruction that can perform the same @@ -469,61 +471,34 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { InstsToRemove.insert(I); } -void IRPromoter::PrepareConstants() { +void IRPromoter::PrepareWrappingAdds() { + LLVM_DEBUG(dbgs() << "ARM CGP: Prepare underflowing adds.\n"); IRBuilder<> Builder{Ctx}; - // First step is to prepare the instructions for mutation. Most constants - // just need to be zero extended into their new type, but complications arise - // because: - // - For nuw binary operators, negative immediates would need sign extending; - // however, instead we'll change them to positive and zext them. We can do - // this because: - // > The operators that can wrap are: add, sub, mul and shl. - // > shl interprets its second operand as unsigned and if the first operand - // is an immediate, it will need zext to be nuw. - // > I'm assuming mul has to interpret immediates as unsigned for nuw. - // > Which leaves the nuw add and sub to be handled; as with shl, if an - // immediate is used as operand 0, it will need zext to be nuw. - // - We also allow add and sub to safely overflow in certain circumstances - // and only when the value (operand 0) is being decreased. - // - // For adds and subs, that are either nuw or safely wrap and use a negative - // immediate as operand 1, we create an equivalent instruction using a - // positive immediate. That positive immediate can then be zext along with - // all the other immediates later. - for (auto *V : *Visited) { - if (!isa(V)) - continue; - auto *I = cast(V); - if (SafeToPromote->count(I)) { - - if (!isa(I)) - continue; - - if (auto *Const = dyn_cast(I->getOperand(1))) { - if (!Const->isNegative()) - continue; + // For adds that safely wrap and use a negative immediate as operand 1, we + // create an equivalent instruction using a positive immediate. + // That positive immediate can then be zext along with all the other + // immediates later. + for (auto *I : *SafeWrap) { + if (I->getOpcode() != Instruction::Add) + continue; - unsigned Opc = I->getOpcode(); - if (Opc != Instruction::Add && Opc != Instruction::Sub) - continue; + LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n"); + assert((isa(I->getOperand(1)) && + cast(I->getOperand(1))->isNegative()) && + "Wrapping should have a negative immediate as the second operand"); - LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n"); - auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); - Builder.SetInsertPoint(I); - Value *NewVal = Opc == Instruction::Sub ? - Builder.CreateAdd(I->getOperand(0), NewConst) : - Builder.CreateSub(I->getOperand(0), NewConst); - LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n"); - - if (auto *NewInst = dyn_cast(NewVal)) { - NewInst->copyIRFlags(I); - NewInsts.insert(NewInst); - } - InstsToRemove.insert(I); - I->replaceAllUsesWith(NewVal); - } + auto Const = cast(I->getOperand(1)); + auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); + Builder.SetInsertPoint(I); + Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst); + if (auto *NewInst = dyn_cast(NewVal)) { + NewInst->copyIRFlags(I); + NewInsts.insert(NewInst); } + InstsToRemove.insert(I); + I->replaceAllUsesWith(NewVal); + LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n"); } for (auto *I : NewInsts) Visited->insert(I); @@ -731,6 +706,8 @@ void IRPromoter::Cleanup() { NewInsts.clear(); TruncTysMap.clear(); Promoted.clear(); + SafeToPromote->clear(); + SafeWrap->clear(); } void IRPromoter::ConvertTruncs() { @@ -762,7 +739,8 @@ void IRPromoter::Mutate(Type *OrigTy, SetVector &Visited, SmallPtrSetImpl &Sources, SmallPtrSetImpl &Sinks, - SmallPtrSetImpl &SafeToPromote) { + SmallPtrSetImpl &SafeToPromote, + SmallPtrSetImpl &SafeWrap) { LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " << ARMCodeGenPrepare::TypeSize << " to 32-bits\n"); @@ -775,6 +753,7 @@ void IRPromoter::Mutate(Type *OrigTy, this->Sources = &Sources; this->Sinks = &Sinks; this->SafeToPromote = &SafeToPromote; + this->SafeWrap = &SafeWrap; // Cache original types of the values that will likely need truncating for (auto *I : Sinks) { @@ -797,9 +776,9 @@ void IRPromoter::Mutate(Type *OrigTy, TruncTysMap[Trunc].push_back(Trunc->getDestTy()); } - // Convert adds and subs using negative immediates to equivalent instructions - // that use positive constants. - PrepareConstants(); + // Convert adds using negative immediates to equivalent instructions that use + // positive constants. + PrepareWrappingAdds(); // Insert zext instructions between sources and their users. ExtendSources(); @@ -889,7 +868,7 @@ bool ARMCodeGenPrepare::isLegalToPromote(Value *V) { if (SafeToPromote.count(I)) return true; - if (isPromotedResultSafe(V) || isSafeOverflow(I)) { + if (isPromotedResultSafe(V) || isSafeWrap(I)) { SafeToPromote.insert(I); return true; } @@ -1025,7 +1004,8 @@ bool ARMCodeGenPrepare::TryToPromote(Value *V) { if (ToPromote < 2) return false; - Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote); + Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote, + SafeWrap); return true; } diff --git a/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll b/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll index d44abf7bcd3..4873b883650 100644 --- a/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll +++ b/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -mattr=-use-misched %s -arm-disable-cgp=false -o - | FileCheck %s ; CHECK: overflow_add ; CHECK: add @@ -194,7 +194,7 @@ entry: } ; CHECK-LABEL: safe_sub_var_imm -; CHECK: add.w [[ADD:r[0-9]+]], r0, #8 +; CHECK: sub.w [[ADD:r[0-9]+]], r0, #248 ; CHECK-NOT: uxt ; CHECK: cmp [[ADD]], #252 define i32 @safe_sub_var_imm(i8* %b) { @@ -220,7 +220,7 @@ entry: } ; CHECK-LABEL: safe_add_var_imm -; CHECK: sub.w [[SUB:r[0-9]+]], r0, #127 +; CHECK: add.w [[SUB:r[0-9]+]], r0, #129 ; CHECK-NOT: uxt ; CHECK: cmp [[SUB]], #127 define i32 @safe_add_var_imm(i8* %b) { @@ -248,3 +248,33 @@ define i8 @convert_add_order(i8 zeroext %arg) { %res = select i1 %cmp.0, i8 %mask.sel, i8 %arg ret i8 %res } + +; CHECK-LABEL: underflow_if_sub +; CHECK: add{{.}} [[ADD:r[0-9]+]], #245 +; CHECK: cmp [[ADD]], r1 +define i8 @underflow_if_sub(i32 %arg, i8 zeroext %arg1) { + %cmp = icmp sgt i32 %arg, 0 + %conv = zext i1 %cmp to i32 + %and = and i32 %arg, %conv + %trunc = trunc i32 %and to i8 + %conv1 = add nuw nsw i8 %trunc, -11 + %cmp.1 = icmp ult i8 %conv1, %arg1 + %res = select i1 %cmp.1, i8 %conv1, i8 100 + ret i8 %res +} + +; CHECK-LABEL: underflow_if_sub_signext +; CHECK: uxtb [[UXT1:r[0-9]+]], r1 +; CHECK: sub{{.*}} [[SUB:r[0-9]+]], #11 +; CHECK: uxtb [[UXT_SUB:r[0-9]+]], [[SUB]] +; CHECK: cmp{{.*}}[[UXT_SUB]] +define i8 @underflow_if_sub_signext(i32 %arg, i8 signext %arg1) { + %cmp = icmp sgt i32 %arg, 0 + %conv = zext i1 %cmp to i32 + %and = and i32 %arg, %conv + %trunc = trunc i32 %and to i8 + %conv1 = add nuw nsw i8 %trunc, -11 + %cmp.1 = icmp ugt i8 %arg1, %conv1 + %res = select i1 %cmp.1, i8 %conv1, i8 100 + ret i8 %res +} -- 2.40.0