remove inalloca parameters in globalopt and simplify argpromotion

author Bob Haarman <llvm@inglorion.net>

Thu, 2 May 2019 00:37:36 +0000 (00:37 +0000)

committer Bob Haarman <llvm@inglorion.net>

Thu, 2 May 2019 00:37:36 +0000 (00:37 +0000)
author Bob Haarman <llvm@inglorion.net>
Thu, 2 May 2019 00:37:36 +0000 (00:37 +0000)
committer Bob Haarman <llvm@inglorion.net>
Thu, 2 May 2019 00:37:36 +0000 (00:37 +0000)
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp

index 5f994625f19567fb2cf16713e194417c9d90d40b..8b77adc78f350c8ba6853789f46308c823bc5fd8 100644 (file)
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -566,8 +566,8 @@ static void markIndicesSafe(const IndicesVector &ToMark,
  /// This method limits promotion of aggregates to only promote up to three
  /// elements of the aggregate in order to avoid exploding the number of
  /// arguments passed in.
-static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
-                                    AAResults &AAR, unsigned MaxElements) {
+static bool isSafeToPromoteArgument(Argument *Arg, bool isByVal, AAResults &AAR,
+                                    unsigned MaxElements) {
    using GEPIndicesSet = std::set<IndicesVector>;
  
    // Quick exit for unused arguments
@@ -589,9 +589,6 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
    //
    // This set will contain all sets of indices that are loaded in the entry
    // block, and thus are safe to unconditionally load in the caller.
-  //
-  // This optimization is also safe for InAlloca parameters, because it verifies
-  // that the address isn't captured.
    GEPIndicesSet SafeToUnconditionallyLoad;
  
    // This set contains all the sets of indices that we are planning to promote.
@@ -599,7 +596,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
    GEPIndicesSet ToPromote;
  
    // If the pointer is always valid, any load with first index 0 is valid.
-  if (isByValOrInAlloca || allCallersPassInValidPointerForArgument(Arg))
+  if (isByVal || allCallersPassInValidPointerForArgument(Arg))
      SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
  
    // First, iterate the entry block and mark loads of (geps of) arguments as
@@ -656,8 +653,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
          // TODO: This runs the above loop over and over again for dead GEPs
          // Couldn't we just do increment the UI iterator earlier and erase the
          // use?
-        return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR,
-                                       MaxElements);
+        return isSafeToPromoteArgument(Arg, isByVal, AAR, MaxElements);
        }
  
        // Ensure that all of the indices are constants.
@@ -856,6 +852,11 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
    if (F->isVarArg())
      return nullptr;
  
+  // Don't transform functions that receive inallocas, as the transformation may
+  // not be safe depending on calling convention.
+  if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
+    return nullptr;
+
    // First check: see if there are any pointer arguments!  If not, quick exit.
    SmallVector<Argument *, 16> PointerArgs;
    for (Argument &I : F->args())
@@ -914,8 +915,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
  
      // If this is a byval argument, and if the aggregate type is small, just
      // pass the elements, which is always safe, if the passed value is densely
-    // packed or if we can prove the padding bytes are never accessed. This does
-    // not apply to inalloca.
+    // packed or if we can prove the padding bytes are never accessed.
      bool isSafeToPromote =
          PtrArg->hasByValAttr() &&
          (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
@@ -966,7 +966,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
      }
  
      // Otherwise, see if we can promote the pointer to its value.
-    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR,
+    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr(), AAR,
                                  MaxElements))
        ArgsToPromote.insert(PtrArg);
    }
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp

index a304d4e2dd9d68be96d6adbb9c78bda83264b065..4d2b0178612a20701d14c21d12ecfc55292c7da2 100644 (file)
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2090,21 +2090,21 @@ static void ChangeCalleesToFastCall(Function *F) {
    }
  }
  
-static AttributeList StripNest(LLVMContext &C, AttributeList Attrs) {
-  // There can be at most one attribute set with a nest attribute.
-  unsigned NestIndex;
-  if (Attrs.hasAttrSomewhere(Attribute::Nest, &NestIndex))
-    return Attrs.removeAttribute(C, NestIndex, Attribute::Nest);
+static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
+                               Attribute::AttrKind A) {
+  unsigned AttrIndex;
+  if (Attrs.hasAttrSomewhere(A, &AttrIndex))
+    return Attrs.removeAttribute(C, AttrIndex, A);
    return Attrs;
  }
  
-static void RemoveNestAttribute(Function *F) {
-  F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
+static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
+  F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
    for (User *U : F->users()) {
      if (isa<BlockAddress>(U))
        continue;
      CallSite CS(cast<Instruction>(U));
-    CS.setAttributes(StripNest(F->getContext(), CS.getAttributes()));
+    CS.setAttributes(StripAttr(F->getContext(), CS.getAttributes(), A));
    }
  }
  
@@ -2119,13 +2119,6 @@ static bool hasChangeableCC(Function *F) {
    if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
      return false;
  
-  // Don't break the invariant that the inalloca parameter is the only parameter
-  // passed in memory.
-  // FIXME: GlobalOpt should remove inalloca when possible and hoist the dynamic
-  // alloca it uses to the entry block if possible.
-  if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))
-    return false;
-
    // FIXME: Change CC for the whole chain of musttail calls when possible.
    //
    // Can't change CC of the function that either has musttail calls, or is a
@@ -2287,6 +2280,17 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
      if (!F->hasLocalLinkage())
        continue;
  
+    // If we have an inalloca parameter that we can safely remove the
+    // inalloca attribute from, do so. This unlocks optimizations that
+    // wouldn't be safe in the presence of inalloca.
+    // FIXME: We should also hoist alloca affected by this to the entry
+    // block if possible.
+    if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
+        !F->hasAddressTaken()) {
+      RemoveAttribute(F, Attribute::InAlloca);
+      Changed = true;
+    }
+
      if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
        NumInternalFunc++;
        TargetTransformInfo &TTI = GetTTI(*F);
@@ -2319,7 +2323,7 @@ OptimizeFunctions(Module &M, TargetLibraryInfo *TLI,
          !F->hasAddressTaken()) {
        // The function is not used by a trampoline intrinsic, so it is safe
        // to remove the 'nest' attribute.
-      RemoveNestAttribute(F);
+      RemoveAttribute(F, Attribute::Nest);
        ++NumNestRemoved;
        Changed = true;
      }
diff --git a/test/Transforms/ArgumentPromotion/X86/thiscall.ll b/test/Transforms/ArgumentPromotion/X86/thiscall.ll

new file mode 100644 (file)

index 0000000..8e7fd97
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/X86/thiscall.ll
@@ -0,0 +1,38 @@
+; In PR41658, argpromotion put an inalloca in a position that per the
+; calling convention is passed in a register. This test verifies that
+; we don't do that anymore. It also verifies that the combination of
+; globalopt and argpromotion is able to optimize the call safely.
+;
+; RUN: opt -S -argpromotion %s | FileCheck --check-prefix=THIS %s
+; RUN: opt -S -globalopt -argpromotion %s | FileCheck --check-prefix=OPT %s
+; THIS: define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a
+; OPT: define internal fastcc void @internalfun(<{ %struct.a }>*)
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc19.11.0"
+
+%struct.a = type { i8 }
+
+define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) {
+entry:
+  %a = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %0, i32 0, i32 0
+  %argmem = alloca inalloca <{ %struct.a }>, align 4
+  %1 = getelementptr inbounds <{ %struct.a }>, <{ %struct.a }>* %argmem, i32 0, i32 0
+  %call = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* %1, %struct.a* dereferenceable(1) %a)
+  call void @ext(<{ %struct.a }>* inalloca %argmem)
+  ret void
+}
+
+; This is here to ensure @internalfun is live.
+define void @exportedfun(%struct.a* %a) {
+  %inalloca.save = tail call i8* @llvm.stacksave()
+  %argmem = alloca inalloca <{ %struct.a }>, align 4
+  call x86_thiscallcc void @internalfun(%struct.a* %a, <{ %struct.a }>* inalloca %argmem)
+  call void @llvm.stackrestore(i8* %inalloca.save)
+  ret void
+}
+
+declare x86_thiscallcc %struct.a* @copy_ctor(%struct.a* returned, %struct.a* dereferenceable(1))
+declare void @ext(<{ %struct.a }>* inalloca)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
diff --git a/test/Transforms/ArgumentPromotion/inalloca.ll b/test/Transforms/ArgumentPromotion/inalloca.ll

index 7ea3b4e42777f679d16c06e14dc20f18465e36d2..02d782b65cf4099c6d57288464be69d92195d385 100644 (file)
--- a/test/Transforms/ArgumentPromotion/inalloca.ll
+++ b/test/Transforms/ArgumentPromotion/inalloca.ll
@@ -1,5 +1,5 @@
-; RUN: opt %s -argpromotion -sroa -S | FileCheck %s
-; RUN: opt %s -passes='argpromotion,function(sroa)' -S | FileCheck %s
+; RUN: opt %s -globalopt -argpromotion -sroa -S | FileCheck %s
+; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s
  
  target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
  
@@ -15,7 +15,7 @@ entry:
    %r = add i32 %a, %b
    ret i32 %r
  }
-; CHECK-LABEL: define internal i32 @f
+; CHECK-LABEL: define internal fastcc i32 @f
  ; CHECK-NOT: load
  ; CHECK: ret
  
@@ -35,7 +35,7 @@ entry:
  
  ; Argpromote can't promote %a because of the icmp use.
  define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind  {
-; CHECK: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b)
+; CHECK: define internal fastcc i1 @g(%struct.ss* %a, %struct.ss* %b)
  entry:
    %c = icmp eq %struct.ss* %a, %b
    ret i1 %c
@@ -45,6 +45,6 @@ define i32 @test() {
  entry:
    %S = alloca inalloca %struct.ss
    %c = call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
-; CHECK: call i1 @g(%struct.ss* %S, %struct.ss* inalloca %S)
+; CHECK: call fastcc i1 @g(%struct.ss* %S, %struct.ss* %S)
    ret i32 0
  }
diff --git a/test/Transforms/GlobalOpt/fastcc.ll b/test/Transforms/GlobalOpt/fastcc.ll

index 64c4a268ff3cba918897f43d462c90633e2b2f09..39542d0b0cb7504e53dea059267a375841a7fd9b 100644 (file)
--- a/test/Transforms/GlobalOpt/fastcc.ll
+++ b/test/Transforms/GlobalOpt/fastcc.ll
@@ -27,7 +27,7 @@ define internal i32 @j(i32* %m) {
  }
  
  define internal i32 @inalloca(i32* inalloca %p) {
-; CHECK-LABEL: define internal i32 @inalloca(i32* inalloca %p)
+; CHECK-LABEL: define internal fastcc i32 @inalloca(i32* %p)
    %rv = load i32, i32* %p
    ret i32 %rv
  }
@@ -52,4 +52,4 @@ define void @call_things() {
  ; CHECK: call fastcc i32 @g
  ; CHECK: call coldcc i32 @h
  ; CHECK: call i32 @j
-; CHECK: call i32 @inalloca(i32* inalloca %args)
+; CHECK: call fastcc i32 @inalloca(i32* %args)
author	Bob Haarman <llvm@inglorion.net>
	Thu, 2 May 2019 00:37:36 +0000 (00:37 +0000)
committer	Bob Haarman <llvm@inglorion.net>
	Thu, 2 May 2019 00:37:36 +0000 (00:37 +0000)
lib/Transforms/IPO/ArgumentPromotion.cpp		patch \| blob \| history
lib/Transforms/IPO/GlobalOpt.cpp		patch \| blob \| history
test/Transforms/ArgumentPromotion/X86/thiscall.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/ArgumentPromotion/inalloca.ll		patch \| blob \| history
test/Transforms/GlobalOpt/fastcc.ll		patch \| blob \| history