/// This method limits promotion of aggregates to only promote up to three
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
-static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
- AAResults &AAR, unsigned MaxElements) {
+static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR,
+ unsigned MaxElements) {
using GEPIndicesSet = std::set<IndicesVector>;
// Quick exit for unused arguments
//
// This set will contain all sets of indices that are loaded in the entry
// block, and thus are safe to unconditionally load in the caller.
- //
- // This optimization is also safe for InAlloca parameters, because it verifies
- // that the address isn't captured.
GEPIndicesSet SafeToUnconditionallyLoad;
// This set contains all the sets of indices that we are planning to promote.
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg))
+ if (isByVal || allCallersPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
// TODO: This runs the above loop over and over again for dead GEPs
// Couldn't we just do increment the UI iterator earlier and erase the
// use?
- return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR,
- MaxElements);
+ return isSafeToPromoteArgument(Arg, isByVal, AAR, MaxElements);
}
// Ensure that all of the indices are constants.
if (F->isVarArg())
return nullptr;
+ // Don't transform functions that receive inallocas, as the transformation may
+ // not be safe depending on calling convention.
+ if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+ return nullptr;
+
// First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument *, 16> PointerArgs;
for (Argument &I : F->args())
// If this is a byval argument, and if the aggregate type is small, just
// pass the elements, which is always safe, if the passed value is densely
- // packed or if we can prove the padding bytes are never accessed. This does
- // not apply to inalloca.
+ // packed or if we can prove the padding bytes are never accessed.
bool isSafeToPromote =
PtrArg->hasByValAttr() &&
(isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
}
// Otherwise, see if we can promote the pointer to its value.
- if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
+ if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr(), AAR,
MaxElements))
ArgsToPromote.insert(PtrArg);
}
}
}
-static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
- // There can be at most one attribute set with a nest attribute.
- unsigned NestIndex;
- if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
- return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
+static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
+ Attribute::AttrKind A) {
+ unsigned AttrIndex;
+ if (Attrs.hasAttrSomewhere(A, &AttrIndex))
+ return Attrs.removeAttribute(C, AttrIndex, A);
return Attrs;
}
-static void RemoveNestAttribute(Function *F) {
- F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
+static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
+ F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
for (User *U : F->users()) {
if (isa<BlockAddress>(U))
continue;
CallSite CS(cast<Instruction>(U));
- CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
+ CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
}
}
if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
return false;
- // Don't break the invariant that the inalloca parameter is the only parameter
- // passed in memory.
- // FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
- // alloca it uses to the entry block if possible.
- if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
- return false;
-
// FIXME: Change CC for the whole chain of musttail calls when possible.
//
// Can't change CC of the function that either has musttail calls, or is a
if (!F->hasLocalLinkage())
continue;
+ // If we have an inalloca parameter that we can safely remove the
+ // inalloca attribute from, do so. This unlocks optimizations that
+ // wouldn't be safe in the presence of inalloca.
+ // FIXME: We should also hoist alloca affected by this to the entry
+ // block if possible.
+ if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
+ !F->hasAddressTaken()) {
+ RemoveAttribute(F, Attribute::InAlloca);
+ Changed = true;
+ }
+
if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
NumInternalFunc++;
TargetTransformInfo &TTI = GetTTI(*F);
!F->hasAddressTaken()) {
// The function is not used by a trampoline intrinsic, so it is safe
// to remove the 'nest' attribute.
- RemoveNestAttribute(F);
+ RemoveAttribute(F, Attribute::Nest);
++NumNestRemoved;
Changed = true;
}
--- /dev/null
+; In PR41658, argpromotion put an inalloca in a position that per the
+; calling convention is passed in a register. This test verifies that
+; we don't do that anymore. It also verifies that the combination of
+; globalopt and argpromotion is able to optimize the call safely.
+;
+; RUN: opt -S -argpromotion %s | FileCheck --check-prefix=THIS %s
+; RUN: opt -S -globalopt -argpromotion %s | FileCheck --check-prefix=OPT %s
+; THIS: define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a
+; OPT: define internal fastcc void @internalfun(<{ %struct.a }>*)
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc19.11.0"
+
+%struct.a = type { i8 }
+
+define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) {
+entry:
+ %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0
+ %argmem = alloca inalloca <{ %struct.a }>, align 4
+ %1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0
+ %call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a)
+ call void @ext(<{ %struct.a }>* inalloca %argmem)
+ ret void
+}
+
+; This is here to ensure @internalfun is live.
+define void @exportedfun(%struct.a* %a) {
+ %inalloca.save = tail call i8* @llvm.stacksave()
+ %argmem = alloca inalloca <{ %struct.a }>, align 4
+ call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem)
+ call void @llvm.stackrestore(i8* %inalloca.save)
+ ret void
+}
+
+declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1))
+declare void @ext(<{ %struct.a }>* inalloca)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
-; RUN: opt %s -argpromotion -sroa -S | FileCheck %s
-; RUN: opt %s -passes='argpromotion,function(sroa)' -S | FileCheck %s
+; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s
+; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
%r = add i32 %a, %b
ret i32 %r
}
-; CHECK-LABEL: define internal i32 @f
+; CHECK-LABEL: define internal fastcc i32 @f
; CHECK-NOT: load
; CHECK: ret
; Argpromote can't promote %a because of the icmp use.
define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind {
-; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b)
+; CHECK: define internal fastcc i1 @g(%struct.ss* %a, %struct.ss* %b)
entry:
%c = icmp eq %struct.ss* %a, %b
ret i1 %c
entry:
%S = alloca inalloca %struct.ss
%c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
-; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
+; CHECK: call fastcc i1 @g(%struct.ss* %S, %struct.ss* %S)
ret i32 0
}
}
define internal i32 @inalloca(i32* inalloca %p) {
-; CHECK-LABEL: define internal i32 @inalloca(i32* inalloca %p)
+; CHECK-LABEL: define internal fastcc i32 @inalloca(i32* %p)
%rv = load i32, i32* %p
ret i32 %rv
}
; CHECK: call fastcc i32 @g
; CHECK: call coldcc i32 @h
; CHECK: call i32 @j
-; CHECK: call i32 @inalloca(i32* inalloca %args)
+; CHECK: call fastcc i32 @inalloca(i32* %args)