Elide argument copies during instruction selection

author Reid Kleckner <rnk@google.com>

Wed, 1 Mar 2017 21:42:00 +0000 (21:42 +0000)

committer Reid Kleckner <rnk@google.com>

Wed, 1 Mar 2017 21:42:00 +0000 (21:42 +0000)
author Reid Kleckner <rnk@google.com>
Wed, 1 Mar 2017 21:42:00 +0000 (21:42 +0000)
committer Reid Kleckner <rnk@google.com>
Wed, 1 Mar 2017 21:42:00 +0000 (21:42 +0000)
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h

index 4600c2c0f10cb128bbbd293a7be37d917d67dfd0..5c9728b0a51ed128c3c078d44816b716b912b008 100644 (file)
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -559,8 +559,7 @@ public:
      return Objects[ObjectIdx+NumFixedObjects].isAliased;
    }
  
-  /// isImmutableObjectIndex - Returns true if the specified index corresponds
-  /// to an immutable object.
+  /// Returns true if the specified index corresponds to an immutable object.
    bool isImmutableObjectIndex(int ObjectIdx) const {
      // Tail calling functions can clobber their function arguments.
      if (HasTailCall)
@@ -570,6 +569,13 @@ public:
      return Objects[ObjectIdx+NumFixedObjects].isImmutable;
    }
  
+  /// Marks the immutability of an object.
+  void setIsImmutableObjectIndex(int ObjectIdx, bool Immutable) {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    Objects[ObjectIdx+NumFixedObjects].isImmutable = Immutable;
+  }
+
    /// Returns true if the specified index corresponds to a spill slot.
    bool isSpillSlotObjectIndex(int ObjectIdx) const {
      assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h

index 5d4ec3219932f9ea65ae4b82ac4dc2527a037528..707323cf9371dae56b6130a72e70637cb1aaee16 100644 (file)
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -54,6 +54,7 @@ public:
    const TargetInstrInfo *TII;
    const TargetLowering *TLI;
    bool FastISelFailed;
+  SmallPtrSet<const Instruction *, 4> ElidedArgCopyInstrs;
  
    static char ID;
  
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h

index defea2b7ddfe52f8c9d951d21bfd266720eb8b2a..4f750b8a289ff743e6987e942adf01c13c859e62 100644 (file)
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -45,6 +45,7 @@ namespace ISD {
      unsigned OrigAlign : 5;    ///< Log 2 of original alignment
      unsigned IsInConsecutiveRegsLast : 1;
      unsigned IsInConsecutiveRegs : 1;
+    unsigned IsCopyElisionCandidate : 1; ///< Argument copy elision candidate
  
      unsigned ByValSize; ///< Byval struct size
  
@@ -54,7 +55,8 @@ namespace ISD {
            IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0),
            IsSwiftSelf(0), IsSwiftError(0), IsHva(0), IsHvaStart(0),
            IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
-          IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), ByValSize(0) {
+          IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
+          IsCopyElisionCandidate(0), ByValSize(0) {
        static_assert(sizeof(*this) == 2 * sizeof(unsigned), "flags are too big");
      }
  
@@ -109,6 +111,9 @@ namespace ISD {
      bool isSplitEnd()   const { return IsSplitEnd; }
      void setSplitEnd()  { IsSplitEnd = 1; }
  
+    bool isCopyElisionCandidate()  const { return IsCopyElisionCandidate; }
+    void setCopyElisionCandidate() { IsCopyElisionCandidate = 1; }
+
      unsigned getByValAlign() const { return (1U << ByValAlign) / 2; }
      void setByValAlign(unsigned A) {
        ByValAlign = Log2_32(A) + 1;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h

index 1976d9dcbb4a15dd10bd5e8aa0eef2c8469e48a9..8a96e7867b6e3da02a28ad9c30c56482a865c422 100644 (file)
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -89,7 +89,7 @@ public:
      assert(!MInsn && "Already initialized?");
  
      assert((!E || E->isValid()) && "Expected valid expression");
-    assert(~FI && "Expected valid index");
+    assert(FI != INT_MAX && "Expected valid index");
  
      FrameIndexExprs.push_back({FI, E});
    }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 66a14d7855fe85b43821c8fe7ddda2e6e95b77c0..3ba342a75fd9e93370da0fe222b10312439e1b11 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8028,6 +8028,173 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
    return true;
  }
  
+typedef DenseMap<const Argument *,
+                 std::pair<const AllocaInst *, const StoreInst *>>
+    ArgCopyElisionMapTy;
+
+/// Scan the entry block of the function in FuncInfo for arguments that look
+/// like copies into a local alloca. Record any copied arguments in
+/// ArgCopyElisionCandidates.
+static void
+findArgumentCopyElisionCandidates(const DataLayout &DL,
+                                  FunctionLoweringInfo *FuncInfo,
+                                  ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
+  // Record the state of every static alloca used in the entry block. Argument
+  // allocas are all used in the entry block, so we need approximately as many
+  // entries as we have arguments.
+  enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
+  SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
+  unsigned NumArgs = FuncInfo->Fn->getArgumentList().size();
+  StaticAllocas.reserve(NumArgs * 2);
+
+  auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
+    if (!V)
+      return nullptr;
+    V = V->stripPointerCasts();
+    const auto *AI = dyn_cast<AllocaInst>(V);
+    if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
+      return nullptr;
+    auto Iter = StaticAllocas.insert({AI, Unknown});
+    return &Iter.first->second;
+  };
+
+  // Look for stores of arguments to static allocas. Look through bitcasts and
+  // GEPs to handle type coercions, as long as the alloca is fully initialized
+  // by the store. Any non-store use of an alloca escapes it and any subsequent
+  // unanalyzed store might write it.
+  // FIXME: Handle structs initialized with multiple stores.
+  for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
+    // Look for stores, and handle non-store uses conservatively.
+    const auto *SI = dyn_cast<StoreInst>(&I);
+    if (!SI) {
+      // We will look through cast uses, so ignore them completely.
+      if (I.isCast())
+        continue;
+      // Ignore debug info intrinsics, they don't escape or store to allocas.
+      if (isa<DbgInfoIntrinsic>(I))
+        continue;
+      // This is an unknown instruction. Assume it escapes or writes to all
+      // static alloca operands.
+      for (const Use &U : I.operands()) {
+        if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
+          *Info = StaticAllocaInfo::Clobbered;
+      }
+      continue;
+    }
+
+    // If the stored value is a static alloca, mark it as escaped.
+    if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
+      *Info = StaticAllocaInfo::Clobbered;
+
+    // Check if the destination is a static alloca.
+    const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
+    StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
+    if (!Info)
+      continue;
+    const AllocaInst *AI = cast<AllocaInst>(Dst);
+
+    // Skip allocas that have been initialized or clobbered.
+    if (*Info != StaticAllocaInfo::Unknown)
+      continue;
+
+    // Check if the stored value is an argument, and that this store fully
+    // initializes the alloca. Don't elide copies from the same argument twice.
+    const Value *Val = SI->getValueOperand()->stripPointerCasts();
+    const auto *Arg = dyn_cast<Argument>(Val);
+    if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+        Arg->getType()->isEmptyTy() ||
+        DL.getTypeStoreSize(Arg->getType()) !=
+            DL.getTypeAllocSize(AI->getAllocatedType()) ||
+        ArgCopyElisionCandidates.count(Arg)) {
+      *Info = StaticAllocaInfo::Clobbered;
+      continue;
+    }
+
+    DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n');
+
+    // Mark this alloca and store for argument copy elision.
+    *Info = StaticAllocaInfo::Elidable;
+    ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
+
+    // Stop scanning if we've seen all arguments. This will happen early in -O0
+    // builds, which is useful, because -O0 builds have large entry blocks and
+    // many allocas.
+    if (ArgCopyElisionCandidates.size() == NumArgs)
+      break;
+  }
+}
+
+/// Try to elide argument copies from memory into a local alloca. Succeeds if
+/// ArgVal is a load from a suitable fixed stack object.
+static void tryToElideArgumentCopy(
+    FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
+    DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
+    SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
+    ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
+    SDValue ArgVal, bool &ArgHasUses) {
+  // Check if this is a load from a fixed stack object.
+  auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
+  if (!LNode)
+    return;
+  auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
+  if (!FINode)
+    return;
+
+  // Check that the fixed stack object is the right size and alignment.
+  // Look at the alignment that the user wrote on the alloca instead of looking
+  // at the stack object.
+  auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
+  assert(ArgCopyIter != ArgCopyElisionCandidates.end());
+  const AllocaInst *AI = ArgCopyIter->second.first;
+  int FixedIndex = FINode->getIndex();
+  int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
+  int OldIndex = AllocaIndex;
+  MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
+  if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
+    DEBUG(dbgs() << "  argument copy elision failed due to bad fixed stack "
+                    "object size\n");
+    return;
+  }
+  unsigned RequiredAlignment = AI->getAlignment();
+  if (!RequiredAlignment) {
+    RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
+        AI->getAllocatedType());
+  }
+  if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
+    DEBUG(dbgs() << "  argument copy elision failed: alignment of alloca "
+                    "greater than stack argument alignment ("
+                 << RequiredAlignment << " vs "
+                 << MFI.getObjectAlignment(FixedIndex) << ")\n");
+    return;
+  }
+
+  // Perform the elision. Delete the old stack object and replace its only use
+  // in the variable info map. Mark the stack object as mutable.
+  DEBUG({
+    dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
+           << "  Replacing frame index " << OldIndex << " with " << FixedIndex
+           << '\n';
+  });
+  MFI.RemoveStackObject(OldIndex);
+  MFI.setIsImmutableObjectIndex(FixedIndex, false);
+  AllocaIndex = FixedIndex;
+  ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
+  Chains.push_back(ArgVal.getValue(1));
+
+  // Avoid emitting code for the store implementing the copy.
+  const StoreInst *SI = ArgCopyIter->second.second;
+  ElidedArgCopyInstrs.insert(SI);
+
+  // Check for uses of the argument again so that we can avoid exporting ArgVal
+  // if it is't used by anything other than the store.
+  for (const Value *U : Arg.users()) {
+    if (U != SI) {
+      ArgHasUses = true;
+      break;
+    }
+  }
+}
+
  void SelectionDAGISel::LowerArguments(const Function &F) {
    SelectionDAG &DAG = SDB->DAG;
    SDLoc dl = SDB->getCurSDLoc();
@@ -8050,6 +8217,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
      Ins.push_back(RetArg);
    }
  
+  // Look for stores of arguments to static allocas. Mark such arguments with a
+  // flag to ask the target to give us the memory location of that argument if
+  // available.
+  ArgCopyElisionMapTy ArgCopyElisionCandidates;
+  findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
+
    // Set up the incoming argument description vector.
    unsigned Idx = 0;
    for (const Argument &Arg : F.args()) {
@@ -8127,6 +8300,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
        if (NeedsRegBlock)
          Flags.setInConsecutiveRegs();
        Flags.setOrigAlign(OriginalAlignment);
+      if (ArgCopyElisionCandidates.count(&Arg))
+        Flags.setCopyElisionCandidate();
  
        MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
        unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
@@ -8199,19 +8374,33 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
      ++i;
    }
  
+  SmallVector<SDValue, 4> Chains;
+  DenseMap<int, int> ArgCopyElisionFrameIndexMap;
    for (const Argument &Arg : F.args()) {
      ++Idx;
      SmallVector<SDValue, 4> ArgValues;
      SmallVector<EVT, 4> ValueVTs;
      ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
      unsigned NumValues = ValueVTs.size();
+    if (NumValues == 0)
+      continue;
+
+    bool ArgHasUses = !Arg.use_empty();
+
+    // Elide the copying store if the target loaded this argument from a
+    // suitable fixed stack object.
+    if (Ins[i].Flags.isCopyElisionCandidate()) {
+      tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
+                             ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
+                             InVals[i], ArgHasUses);
+    }
  
      // If this argument is unused then remember its value. It is used to generate
      // debugging information.
      bool isSwiftErrorArg =
          TLI->supportSwiftError() &&
          F.getAttributes().hasAttribute(Idx, Attribute::SwiftError);
-    if (Arg.use_empty() && NumValues && !isSwiftErrorArg) {
+    if (!ArgHasUses && !isSwiftErrorArg) {
        SDB->setUnusedArgValue(&Arg, InVals[i]);
  
        // Also remember any frame index for use in FastISel.
@@ -8228,16 +8417,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
        // Even an apparant 'unused' swifterror argument needs to be returned. So
        // we do generate a copy for it that can be used on return from the
        // function.
-      if (!Arg.use_empty() || isSwiftErrorArg) {
+      if (ArgHasUses || isSwiftErrorArg) {
          Optional<ISD::NodeType> AssertOp;
          if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
            AssertOp = ISD::AssertSext;
          else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
            AssertOp = ISD::AssertZext;
  
-        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
-                                             NumParts, PartVT, VT,
-                                             nullptr, AssertOp));
+        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
+                                             PartVT, VT, nullptr, AssertOp));
        }
  
        i += NumParts;
@@ -8291,8 +8479,26 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
      }
    }
  
+  if (!Chains.empty()) {
+    Chains.push_back(NewRoot);
+    NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+  }
+
+  DAG.setRoot(NewRoot);
+
    assert(i == InVals.size() && "Argument register count mismatch!");
  
+  // If any argument copy elisions occurred and we have debug info, update the
+  // stale frame indices used in the dbg.declare variable info table.
+  MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
+  if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
+    for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
+      auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
+      if (I != ArgCopyElisionFrameIndexMap.end())
+        VI.Slot = I->second;
+    }
+  }
+
    // Finally, if the target has anything special to do, allow it to do so.
    EmitFunctionEntryCode();
  }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

index 7e87606852522e6db492f00e32d5328693d23a49..123938646a3fa5a1b5299793b2250a4134d09bcd 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -713,8 +713,10 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
                                          bool &HadTailCall) {
    // Lower the instructions. If a call is emitted as a tail call, cease emitting
    // nodes for this block.
-  for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
-    SDB->visit(*I);
+  for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
+    if (!ElidedArgCopyInstrs.count(&*I))
+      SDB->visit(*I);
+  }
  
    // Make sure the root of the DAG is up-to-date.
    CurDAG->setRoot(SDB->getControlRoot());
@@ -1564,7 +1566,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
          const Instruction *Inst = &*std::prev(BI);
  
          // If we no longer require this instruction, skip it.
-        if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+        if (isFoldedOrDeadInstruction(Inst, FuncInfo) ||
+            ElidedArgCopyInstrs.count(Inst)) {
            --NumFastIselRemaining;
            continue;
          }
@@ -1694,6 +1697,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
  
      FinishBasicBlock();
      FuncInfo->PHINodesToUpdate.clear();
+    ElidedArgCopyInstrs.clear();
    }
  
    propagateSwiftErrorVRegs(FuncInfo);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index e0cdd0d8d427bff9cbaa13d6593a46b8fbf69597..e3a880957ed33ae099028421b9df926fa73ec57f 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2691,6 +2691,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
        CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
    bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
    EVT ValVT;
+  MVT PtrVT = getPointerTy(DAG.getDataLayout());
  
    // If value is passed by pointer we have address passed instead of the value
    // itself. No need to extend if the mask value and location share the same
@@ -2729,30 +2730,71 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
      if (CallConv == CallingConv::X86_INTR) {
        MFI.setObjectOffset(FI, Offset);
      }
-    return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
-  } else {
-    int FI = MFI.CreateFixedObject(ValVT.getSizeInBits()/8,
-                                   VA.getLocMemOffset(), isImmutable);
-
-    // Set SExt or ZExt flag.
-    if (VA.getLocInfo() == CCValAssign::ZExt) {
-      MFI.setObjectZExt(FI, true);
-    } else if (VA.getLocInfo() == CCValAssign::SExt) {
-      MFI.setObjectSExt(FI, true);
+    return DAG.getFrameIndex(FI, PtrVT);
+  }
+
+  // This is an argument in memory. We might be able to perform copy elision.
+  if (Flags.isCopyElisionCandidate()) {
+    EVT ArgVT = Ins[i].ArgVT;
+    SDValue PartAddr;
+    if (Ins[i].PartOffset == 0) {
+      // If this is a one-part value or the first part of a multi-part value,
+      // create a stack object for the entire argument value type and return a
+      // load from our portion of it. This assumes that if the first part of an
+      // argument is in memory, the rest will also be in memory.
+      int FI = MFI.CreateFixedObject(ArgVT.getSizeInBits() / 8,
+                                     VA.getLocMemOffset(), /*Immutable=*/false);
+      PartAddr = DAG.getFrameIndex(FI, PtrVT);
+      return DAG.getLoad(
+          ValVT, dl, Chain, PartAddr,
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+    } else {
+      // This is not the first piece of an argument in memory. See if there is
+      // already a fixed stack object including this offset. If so, assume it
+      // was created by the PartOffset == 0 branch above and create a load from
+      // the appropriate offset into it.
+      int64_t PartBegin = VA.getLocMemOffset();
+      int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
+      int FI = MFI.getObjectIndexBegin();
+      for (; MFI.isFixedObjectIndex(FI); ++FI) {
+        int64_t ObjBegin = MFI.getObjectOffset(FI);
+        int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
+        if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
+          break;
+      }
+      if (MFI.isFixedObjectIndex(FI)) {
+        SDValue Addr =
+            DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
+                        DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
+        return DAG.getLoad(
+            ValVT, dl, Chain, Addr,
+            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
+                                              Ins[i].PartOffset));
+      }
      }
+  }
  
-    // Adjust SP offset of interrupt parameter.
-    if (CallConv == CallingConv::X86_INTR) {
-      MFI.setObjectOffset(FI, Offset);
-    }
+  int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
+                                 VA.getLocMemOffset(), isImmutable);
  
-    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
-    SDValue Val = DAG.getLoad(
-        ValVT, dl, Chain, FIN,
-        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
-    return ExtendedInMem ?
-      DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
+  // Set SExt or ZExt flag.
+  if (VA.getLocInfo() == CCValAssign::ZExt) {
+    MFI.setObjectZExt(FI, true);
+  } else if (VA.getLocInfo() == CCValAssign::SExt) {
+    MFI.setObjectSExt(FI, true);
    }
+
+  // Adjust SP offset of interrupt parameter.
+  if (CallConv == CallingConv::X86_INTR) {
+    MFI.setObjectOffset(FI, Offset);
+  }
+
+  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+  SDValue Val = DAG.getLoad(
+      ValVT, dl, Chain, FIN,
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
+  return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)
+                       : Val;
  }
  
  // FIXME: Get this from tablegen.
diff --git a/test/CodeGen/AArch64/arm64-abi-varargs.ll b/test/CodeGen/AArch64/arm64-abi-varargs.ll

index a29f8c4b57ab6ed39eda2f490c712f7f5d51b529..0a79655714806b5f135bd425a3b3c847f53470e5 100644 (file)
--- a/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -3,7 +3,7 @@
  ; rdar://13625505
  ; Here we have 9 fixed integer arguments the 9th argument in on stack, the
  ; varargs start right after at 8-byte alignment.
-define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
+define void @fn9(i32* %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
  ; CHECK-LABEL: fn9:
  ; 9th fixed argument
  ; CHECK: ldr {{w[0-9]+}}, [sp, #64]
@@ -30,7 +30,6 @@ define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
    %a10 = alloca i32, align 4
    %a11 = alloca i32, align 4
    %a12 = alloca i32, align 4
-  store i32 %a1, i32* %1, align 4
    store i32 %a2, i32* %2, align 4
    store i32 %a3, i32* %3, align 4
    store i32 %a4, i32* %4, align 4
@@ -39,6 +38,7 @@ define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7,
    store i32 %a7, i32* %7, align 4
    store i32 %a8, i32* %8, align 4
    store i32 %a9, i32* %9, align 4
+  store i32 %a9, i32* %a1
    %10 = bitcast i8** %args to i8*
    call void @llvm.va_start(i8* %10)
    %11 = va_arg i8** %args, i32
@@ -93,7 +93,7 @@ define i32 @main() nounwind ssp {
    %10 = load i32, i32* %a10, align 4
    %11 = load i32, i32* %a11, align 4
    %12 = load i32, i32* %a12, align 4
-  call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
+  call void (i32*, i32, i32, i32, i32, i32, i32, i32, i32, ...) @fn9(i32* %a1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
    ret i32 0
  }
  
diff --git a/test/CodeGen/ARM/arg-copy-elide.ll b/test/CodeGen/ARM/arg-copy-elide.ll

new file mode 100644 (file)

index 0000000..739b560
--- /dev/null
+++ b/test/CodeGen/ARM/arg-copy-elide.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mtriple=armv7-linux < %s | FileCheck %s
+
+declare arm_aapcscc void @addrof_i32(i32*)
+declare arm_aapcscc void @addrof_i64(i64*)
+
+define arm_aapcscc void @simple(i32, i32, i32, i32, i32 %x) {
+entry:
+  %x.addr = alloca i32
+  store i32 %x, i32* %x.addr
+  call void @addrof_i32(i32* %x.addr)
+  ret void
+}
+
+; CHECK-LABEL: simple:
+; CHECK: push {r11, lr}
+; CHECK: add r0, sp, #8
+; CHECK: bl addrof_i32
+; CHECK: pop {r11, pc}
+
+
+; We need to load %x before calling addrof_i32 now because it could mutate %x in
+; place.
+
+define arm_aapcscc i32 @use_arg(i32, i32, i32, i32, i32 %x) {
+entry:
+  %x.addr = alloca i32
+  store i32 %x, i32* %x.addr
+  call void @addrof_i32(i32* %x.addr)
+  ret i32 %x
+}
+
+; CHECK-LABEL: use_arg:
+; CHECK: push {[[csr:[^ ]*]], lr}
+; CHECK: ldr [[csr]], [sp, #8]
+; CHECK: add r0, sp, #8
+; CHECK: bl addrof_i32
+; CHECK: mov r0, [[csr]]
+; CHECK: pop {[[csr]], pc}
+
+
+define arm_aapcscc i64 @split_i64(i32, i32, i32, i32, i64 %x) {
+entry:
+  %x.addr = alloca i64, align 4
+  store i64 %x, i64* %x.addr, align 4
+  call void @addrof_i64(i64* %x.addr)
+  ret i64 %x
+}
+
+; CHECK-LABEL: split_i64:
+; CHECK: push    {r4, r5, r11, lr}
+; CHECK: sub     sp, sp, #8
+; CHECK: ldr     r4, [sp, #28]
+; CHECK: ldr     r5, [sp, #24]
+; CHECK: mov     r0, sp
+; CHECK: str     r4, [sp, #4]
+; CHECK: str     r5, [sp]
+; CHECK: bl      addrof_i64
+; CHECK: mov     r0, r5
+; CHECK: mov     r1, r4
+; CHECK: add     sp, sp, #8
+; CHECK: pop     {r4, r5, r11, pc}
diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll

index 80a1c648b781ca80815bce46d02121fc5a26396d..73aad48b73e68cc0162a48fe2ad7c006ba23ceb7 100644 (file)
--- a/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -236,8 +236,8 @@ entry:
    ret i32 %tmp
  
  ; CHECK-LABEL: va9:
-; CHECK: addiu   $sp, $sp, -32
-; CHECK: lw      $2, 52($sp)
+; CHECK: addiu   $sp, $sp, -24
+; CHECK: lw      $2, 44($sp)
  }
  
  ; double
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll

index fc5520e12ac047ffbf1270a829c0f9470dc75386..24abb719b0f960349b7b53a05ce465a0a66b3749 100644 (file)
--- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -8,9 +8,10 @@ target triple = "i386-apple-darwin10.0.0"
  @.str = internal constant [4 x i8] c"%p\0A\00"    ; <[4 x i8]*> [#uses=1]
  @llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.S*, i32, %struct.S*)* @_Z4test1SiS_ to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
  
-; Verify that %esi gets spilled before the call.
+; Verify that %s1 gets spilled before the call.
  ; CHECK: Z4test1SiS
-; CHECK: movl %esi,{{.*}}(%ebp) 
+; CHECK: leal 8(%ebp), %[[reg:[^ ]*]]
+; CHECK: movl %[[reg]],{{.*}}(%ebp) ## 4-byte Spill
  ; CHECK: calll __Z6throwsv
  
  define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
diff --git a/test/CodeGen/X86/arg-copy-elide.ll b/test/CodeGen/X86/arg-copy-elide.ll

new file mode 100644 (file)

index 0000000..15afb56
--- /dev/null
+++ b/test/CodeGen/X86/arg-copy-elide.ll
@@ -0,0 +1,280 @@
+; RUN: llc -mtriple=i686-windows < %s | FileCheck %s
+
+declare void @addrof_i32(i32*)
+declare void @addrof_i64(i64*)
+declare void @addrof_i128(i128*)
+declare void @addrof_i32_x3(i32*, i32*, i32*)
+
+define void @simple(i32 %x) {
+entry:
+  %x.addr = alloca i32
+  store i32 %x, i32* %x.addr
+  call void @addrof_i32(i32* %x.addr)
+  ret void
+}
+
+; CHECK-LABEL: _simple:
+; CHECK: leal 4(%esp), %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i32
+; CHECK: retl
+
+
+; We need to load %x before calling addrof_i32 now because it could mutate %x in
+; place.
+
+define i32 @use_arg(i32 %x) {
+entry:
+  %x.addr = alloca i32
+  store i32 %x, i32* %x.addr
+  call void @addrof_i32(i32* %x.addr)
+  ret i32 %x
+}
+
+; CHECK-LABEL: _use_arg:
+; CHECK: pushl %[[csr:[^ ]*]]
+; CHECK-DAG: movl 8(%esp), %[[csr]]
+; CHECK-DAG: leal 8(%esp), %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i32
+; CHECK: movl %[[csr]], %eax
+; CHECK: popl %[[csr]]
+; CHECK: retl
+
+
+define i64 @split_i64(i64 %x) {
+entry:
+  %x.addr = alloca i64, align 4
+  store i64 %x, i64* %x.addr, align 4
+  call void @addrof_i64(i64* %x.addr)
+  ret i64 %x
+}
+
+; CHECK-LABEL: _split_i64:
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
+; CHECK: pushl %[[csr2:[^ ]*]]
+; CHECK: pushl %[[csr1:[^ ]*]]
+; CHECK: andl $-8, %esp
+; CHECK-DAG: movl 8(%ebp), %[[csr1]]
+; CHECK-DAG: movl 12(%ebp), %[[csr2]]
+; CHECK-DAG: leal 8(%ebp), %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i64
+; CHECK-DAG: movl %[[csr1]], %eax
+; CHECK-DAG: movl %[[csr2]], %edx
+; CHECK: leal -8(%ebp), %esp
+; CHECK: popl %[[csr1]]
+; CHECK: popl %[[csr2]]
+; CHECK: popl %ebp
+; CHECK: retl
+
+
+; We can't copy elide when an i64 is split between registers and memory in a
+; fastcc function.
+
+define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) {
+entry:
+  %x.addr = alloca i64, align 4
+  store i64 %x, i64* %x.addr, align 4
+  call void @addrof_i64(i64* %x.addr)
+  ret i64 %x
+}
+
+; CHECK-LABEL: _fastcc_split_i64:
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
+; CHECK-DAG: movl %edx, %[[r1:[^ ]*]]
+; CHECK-DAG: movl 8(%ebp), %[[r2:[^ ]*]]
+; CHECK-DAG: movl %[[r2]], 4(%esp)
+; CHECK-DAG: movl %[[r1]], (%esp)
+; CHECK: movl %esp, %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i64
+; CHECK: popl %ebp
+; CHECK: retl
+
+
+; We can't copy elide when it would reduce the user requested alignment.
+
+define void @high_alignment(i32 %x) {
+entry:
+  %x.p = alloca i32, align 128
+  store i32 %x, i32* %x.p
+  call void @addrof_i32(i32* %x.p)
+  ret void
+}
+
+; CHECK-LABEL: _high_alignment:
+; CHECK: andl $-128, %esp
+; CHECK: movl 8(%ebp), %[[reg:[^ ]*]]
+; CHECK: movl %[[reg]], (%esp)
+; CHECK: movl %esp, %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i32
+; CHECK: retl
+
+
+; We can't copy elide when it would reduce the ABI required alignment.
+; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC
+; doesn't guarantee it.
+
+define void @abi_alignment(i64 %x) {
+entry:
+  %x.p = alloca i64
+  store i64 %x, i64* %x.p
+  call void @addrof_i64(i64* %x.p)
+  ret void
+}
+
+; CHECK-LABEL: _abi_alignment:
+; CHECK: andl $-8, %esp
+; CHECK: movl 8(%ebp), %[[reg:[^ ]*]]
+; CHECK: movl %[[reg]], (%esp)
+; CHECK: movl %esp, %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i64
+; CHECK: retl
+
+
+; The code we generate for this is unimportant. This is mostly a crash test.
+
+define void @split_i128(i128* %sret, i128 %x) {
+entry:
+  %x.addr = alloca i128
+  store i128 %x, i128* %x.addr
+  call void @addrof_i128(i128* %x.addr)
+  store i128 %x, i128* %sret
+  ret void
+}
+
+; CHECK-LABEL: _split_i128:
+; CHECK: pushl %ebp
+; CHECK: calll _addrof_i128
+; CHECK: retl
+
+
+; Check that we load all of x, y, and z before the call.
+
+define i32 @three_args(i32 %x, i32 %y, i32 %z) {
+entry:
+  %z.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %z, i32* %z.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  store i32 %x, i32* %x.addr, align 4
+  call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr)
+  %s1 = add i32 %x, %y
+  %sum = add i32 %s1, %z
+  ret i32 %sum
+}
+
+; CHECK-LABEL: _three_args:
+; CHECK: pushl %[[csr:[^ ]*]]
+; CHECK-DAG: movl {{[0-9]+}}(%esp), %[[csr]]
+; CHECK-DAG: addl {{[0-9]+}}(%esp), %[[csr]]
+; CHECK-DAG: addl {{[0-9]+}}(%esp), %[[csr]]
+; CHECK-DAG: leal 8(%esp), %[[x:[^ ]*]]
+; CHECK-DAG: leal 12(%esp), %[[y:[^ ]*]]
+; CHECK-DAG: leal 16(%esp), %[[z:[^ ]*]]
+; CHECK: pushl %[[z]]
+; CHECK: pushl %[[y]]
+; CHECK: pushl %[[x]]
+; CHECK: calll _addrof_i32_x3
+; CHECK: movl %[[csr]], %eax
+; CHECK: popl %[[csr]]
+; CHECK: retl
+
+
+define void @two_args_same_alloca(i32 %x, i32 %y) {
+entry:
+  %x.addr = alloca i32
+  store i32 %x, i32* %x.addr
+  store i32 %y, i32* %x.addr
+  call void @addrof_i32(i32* %x.addr)
+  ret void
+}
+
+; CHECK-LABEL: _two_args_same_alloca:
+; CHECK: movl 8(%esp), {{.*}}
+; CHECK: movl {{.*}}, 4(%esp)
+; CHECK: leal 4(%esp), %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i32
+; CHECK: retl
+
+
+define void @avoid_byval(i32* byval %x) {
+entry:
+  %x.p.p = alloca i32*
+  store i32* %x, i32** %x.p.p
+  call void @addrof_i32(i32* %x)
+  ret void
+}
+
+; CHECK-LABEL: _avoid_byval:
+; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i32
+; CHECK: retl
+
+
+define void @avoid_inalloca(i32* inalloca %x) {
+entry:
+  %x.p.p = alloca i32*
+  store i32* %x, i32** %x.p.p
+  call void @addrof_i32(i32* %x)
+  ret void
+}
+
+; CHECK-LABEL: _avoid_inalloca:
+; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i32
+; CHECK: retl
+
+
+; Don't elide the copy when the alloca is escaped with a store.
+
+define void @escape_with_store(i32 %x) {
+  %x1 = alloca i32
+  %x2 = alloca i32*
+  store i32* %x1, i32** %x2
+  %x3 = load i32*, i32** %x2
+  store i32 0, i32* %x3
+  store i32 %x, i32* %x1
+  call void @addrof_i32(i32* %x1)
+  ret void
+}
+
+; CHECK-LABEL: _escape_with_store:
+; CHECK-DAG: movl {{.*}}(%esp), %[[reg:[^ ]*]]
+; CHECK-DAG: movl $0, [[offs:[0-9]*]](%esp)
+; CHECK: movl %[[reg]], [[offs]](%esp)
+; CHECK: calll _addrof_i32
+
+
+; This test case exposed issues with the use of TokenFactor.
+
+define void @sret_and_elide(i32* sret %sret, i32 %v) {
+  %v.p = alloca i32
+  store i32 %v, i32* %v.p
+  call void @addrof_i32(i32* %v.p)
+  store i32 %v, i32* %sret
+  ret void
+}
+
+; CHECK-LABEL: _sret_and_elide:
+; CHECK: pushl
+; CHECK: pushl
+; CHECK: movl 12(%esp), %[[sret:[^ ]*]]
+; CHECK: movl 16(%esp), %[[v:[^ ]*]]
+; CHECK: leal 16(%esp), %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i32
+; CHECK: movl %[[v]], (%[[sret]])
+; CHECK: movl %[[sret]], %eax
+; CHECK: popl
+; CHECK: popl
+; CHECK: retl
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll

index 25853579a4b7ff0f7a7079241fecec021aa70634..db63a8048836ff7d25bb37eb675e6aaf9679e640 100644 (file)
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -1,31 +1,27 @@
  ; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -optimize-regalloc -regalloc=basic -no-integrated-as | FileCheck %s
  ; rdar://6992609
  
-; CHECK: movl %ecx, 4([[ESP:%e..]])
-; CHECK: movl 4([[ESP]]), [[EDX:%e..]]
-; CHECK: movl [[EDX]], 4([[ESP]])
  target triple = "i386-apple-darwin9.0"
-@llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata"               ; <[1 x i8*]*> [#uses=0]
  
  define i64 @_OSSwapInt64(i64 %_data) nounwind {
  entry:
-       %retval = alloca i64            ; <i64*> [#uses=2]
-       %_data.addr = alloca i64                ; <i64*> [#uses=4]
-       store i64 %_data, i64* %_data.addr
-       %tmp = load i64, i64* %_data.addr               ; <i64> [#uses=1]
-       %0 = call i64 asm "bswap   %eax\0A\09bswap   %edx\0A\09xchgl   %eax, %edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %tmp) nounwind               ; <i64> [#uses=1]
-       store i64 %0, i64* %_data.addr
-       %tmp1 = load i64, i64* %_data.addr              ; <i64> [#uses=1]
-       store i64 %tmp1, i64* %retval
-       %1 = load i64, i64* %retval             ; <i64> [#uses=1]
-       ret i64 %1
+  %0 = call i64 asm "bswap   %eax\0A\09bswap   %edx\0A\09xchgl   %eax, %%edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %_data) nounwind
+  ret i64 %0
  }
  
+; CHECK-LABEL: __OSSwapInt64:
+; CHECK-DAG: movl 8(%esp), %edx
+; CHECK-DAG: movl 4(%esp), %eax
+; CHECK: ## InlineAsm Start
+; CHECK: ## InlineAsm End
+;       Everything is set up in EAX:EDX, return immediately.
+; CHECK-NEXT: retl
+
  ; The tied operands are not necessarily in the same order as the defs.
  ; PR13742
  define i64 @swapped(i64 %x, i64 %y) nounwind {
  entry:
-       %x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) nounwind
-        %x1 = extractvalue { i64, i64 } %x0, 0
-        ret i64 %x1
+  %x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) nounwind
+  %x1 = extractvalue { i64, i64 } %x0, 0
+  ret i64 %x1
  }
diff --git a/test/CodeGen/X86/pr30430.ll b/test/CodeGen/X86/pr30430.ll

index 6aa4c91c4a808950e9ae60d1f76ca4bbc68385e4..14d81f14fc32c9046f7c595142b905d3054fca44 100644 (file)
--- a/test/CodeGen/X86/pr30430.ll
+++ b/test/CodeGen/X86/pr30430.ll
@@ -30,14 +30,6 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
  ; CHECK-NEXT:    vmovss %xmm5, {{[0-9]+}}(%rsp)
  ; CHECK-NEXT:    vmovss %xmm6, {{[0-9]+}}(%rsp)
  ; CHECK-NEXT:    vmovss %xmm7, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm15, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm14, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm13, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm12, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm11, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm10, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm9, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm8, (%rsp)
  ; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  ; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  ; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
@@ -46,14 +38,14 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
  ; CHECK-NEXT:    vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero
  ; CHECK-NEXT:    vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero
  ; CHECK-NEXT:    vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero
  ; CHECK-NEXT:    vmovss %xmm0, {{[0-9]+}}(%rsp)
  ; CHECK-NEXT:    vmovss %xmm1, {{[0-9]+}}(%rsp)
  ; CHECK-NEXT:    vmovss %xmm2, {{[0-9]+}}(%rsp)
@@ -62,14 +54,14 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
  ; CHECK-NEXT:    vmovss %xmm5, {{[0-9]+}}(%rsp)
  ; CHECK-NEXT:    vmovss %xmm6, {{[0-9]+}}(%rsp)
  ; CHECK-NEXT:    vmovss %xmm7, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm8, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm9, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm10, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm11, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm12, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm13, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm14, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovss %xmm15, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovss %xmm16, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovss %xmm17, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovss %xmm18, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovss %xmm19, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovss %xmm20, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovss %xmm21, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovss %xmm22, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovss %xmm23, {{[0-9]+}}(%rsp)
  ; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
  ; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
@@ -104,11 +96,19 @@ define <16 x float> @makefloat(float %f1, float %f2, float %f3, float %f4, float
  ; CHECK-NEXT:    # implicit-def: %YMM3
  ; CHECK-NEXT:    vmovaps %xmm1, %xmm3
  ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm3, %ymm3
-; CHECK-NEXT:    # implicit-def: %ZMM16
-; CHECK-NEXT:    vmovaps %zmm3, %zmm16
-; CHECK-NEXT:    vinsertf64x4 $1, %ymm2, %zmm16, %zmm16
-; CHECK-NEXT:    vmovaps %zmm16, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    # implicit-def: %ZMM24
+; CHECK-NEXT:    vmovaps %zmm3, %zmm24
+; CHECK-NEXT:    vinsertf64x4 $1, %ymm2, %zmm24, %zmm24
+; CHECK-NEXT:    vmovaps %zmm24, {{[0-9]+}}(%rsp)
  ; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %zmm0
+; CHECK-NEXT:    vmovss %xmm15, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT:    vmovss %xmm8, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT:    vmovss %xmm9, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT:    vmovss %xmm10, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT:    vmovss %xmm11, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT:    vmovss %xmm12, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT:    vmovss %xmm13, {{[0-9]+}}(%rsp) # 4-byte Spill
+; CHECK-NEXT:    vmovss %xmm14, (%rsp) # 4-byte Spill
  ; CHECK-NEXT:    movq %rbp, %rsp
  ; CHECK-NEXT:    popq %rbp
  ; CHECK-NEXT:    retq
diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll

index 18434546262ce143a1243c01e25bbe0b6d885ad6..7f040dd1a7fe4e2fdba2d5b4225a23260ad1b6d6 100644 (file)
--- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
@@ -1653,12 +1653,8 @@ define <4 x float> @test_mm_set1_ps(float %a0) nounwind {
  define void @test_mm_setcsr(i32 %a0) nounwind {
  ; X32-LABEL: test_mm_setcsr:
  ; X32:       # BB#0:
-; X32-NEXT:    pushl %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl %esp, %ecx
-; X32-NEXT:    movl %eax, (%esp)
-; X32-NEXT:    ldmxcsr (%ecx)
-; X32-NEXT:    popl %eax
+; X32-NEXT:    leal 4(%esp), %eax
+; X32-NEXT:    ldmxcsr (%eax)
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm_setcsr:
diff --git a/test/DebugInfo/X86/discriminator.ll b/test/DebugInfo/X86/discriminator.ll

index 49b2326ac7448bf49e17bc51513164d9afb6b6f1..a040137adec46b5f65d2f3b23c75763bd9be547e 100644 (file)
--- a/test/DebugInfo/X86/discriminator.ll
+++ b/test/DebugInfo/X86/discriminator.ll
@@ -59,4 +59,4 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
  
  ; CHECK: Address            Line   Column File   ISA Discriminator Flags
  ; CHECK: ------------------ ------ ------ ------ --- ------------- -------------
-; CHECK: 0x0000000000000011      2      0      1   0            42 {{$}}
+; CHECK: 0x000000000000000a      2      0      1   0            42 {{$}}
author	Reid Kleckner <rnk@google.com>
	Wed, 1 Mar 2017 21:42:00 +0000 (21:42 +0000)
committer	Reid Kleckner <rnk@google.com>
	Wed, 1 Mar 2017 21:42:00 +0000 (21:42 +0000)
include/llvm/CodeGen/MachineFrameInfo.h		patch \| blob \| history
include/llvm/CodeGen/SelectionDAGISel.h		patch \| blob \| history
include/llvm/Target/TargetCallingConv.h		patch \| blob \| history
lib/CodeGen/AsmPrinter/DwarfDebug.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-abi-varargs.ll		patch \| blob \| history
test/CodeGen/ARM/arg-copy-elide.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/Mips/o32_cc_vararg.ll		patch \| blob \| history
test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll		patch \| blob \| history
test/CodeGen/X86/arg-copy-elide.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/inline-asm-tied.ll		patch \| blob \| history
test/CodeGen/X86/pr30430.ll		patch \| blob \| history
test/CodeGen/X86/sse-intrinsics-fast-isel.ll		patch \| blob \| history
test/DebugInfo/X86/discriminator.ll		patch \| blob \| history