[AArch64] Static (de)allocation of SVE stack objects.

author Sander de Smalen <sander.desmalen@arm.com>

Thu, 3 Oct 2019 11:33:50 +0000 (11:33 +0000)

committer Sander de Smalen <sander.desmalen@arm.com>

Thu, 3 Oct 2019 11:33:50 +0000 (11:33 +0000)
author Sander de Smalen <sander.desmalen@arm.com>
Thu, 3 Oct 2019 11:33:50 +0000 (11:33 +0000)
committer Sander de Smalen <sander.desmalen@arm.com>
Thu, 3 Oct 2019 11:33:50 +0000 (11:33 +0000)
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h

index 94e76a75e8da6f51dfb394ccde94f0e06235ec89..069d0aa45095b78617d07490473e474f94267afd 100644 (file)
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -314,6 +314,7 @@ struct ScalarEnumerationTraits<TargetStackID::Value> {
    static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) {
      IO.enumCase(ID, "default", TargetStackID::Default);
      IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill);
+    IO.enumCase(ID, "sve-vec", TargetStackID::SVEVector);
      IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc);
    }
  };
diff --git a/include/llvm/CodeGen/TargetFrameLowering.h b/include/llvm/CodeGen/TargetFrameLowering.h

index 284f7ba64dbaa7c0c9c0ad046a66695ca65c60f8..6e4a723b426fc6726d3e9b982eaee5aac4a59053 100644 (file)
--- a/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/include/llvm/CodeGen/TargetFrameLowering.h
@@ -28,6 +28,7 @@ namespace TargetStackID {
    enum Value {
      Default = 0,
      SGPRSpill = 1,
+    SVEVector = 2,
      NoAlloc = 255
    };
  }
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp

index 8357b763179d2bd9aada464ec66382a1b4382e1e..c42c16bc1aad43db20f4d62e54085c701c87c64b 100644 (file)
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -55,6 +55,10 @@
  // | callee-saved fp/simd/SVE regs     |
  // |                                   |
  // |-----------------------------------|
+// |                                   |
+// |        SVE stack objects          |
+// |                                   |
+// |-----------------------------------|
  // |.empty.space.to.make.part.below....|
  // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
  // |.the.standard.16-byte.alignment....|  compile time; if present)
@@ -202,6 +206,12 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
    return DefaultSafeSPDisplacement;
  }
  
+/// Returns the size of the entire SVE stackframe (calleesaves + spills).
+static StackOffset getSVEStackSize(const MachineFunction &MF) {
+  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  return {(int64_t)AFI->getStackSizeSVE(), MVT::nxv1i8};
+}
+
  bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
    if (!EnableRedZone)
      return false;
@@ -214,7 +224,8 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
    const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    unsigned NumBytes = AFI->getLocalStackSize();
  
-  return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
+  return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128 ||
+           getSVEStackSize(MF));
  }
  
  /// hasFP - Return true if the specified function should have a dedicated frame
@@ -456,6 +467,11 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
    if (canUseRedZone(MF))
      return false;
  
+  // When there is an SVE area on the stack, always allocate the
+  // callee-saves and spills/locals separately.
+  if (getSVEStackSize(MF))
+    return false;
+
    return true;
  }
  
@@ -870,6 +886,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
    // Ideally it should match SP value after prologue.
    AFI->setTaggedBasePointerOffset(MFI.getStackSize());
  
+  const StackOffset &SVEStackSize = getSVEStackSize(MF);
+
    // getStackSize() includes all the locals in its size calculation. We don't
    // include these locals when computing the stack size of a funclet, as they
    // are allocated in the parent's stack frame and accessed via the frame
@@ -880,6 +898,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                             : (int)MFI.getStackSize();
    if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
      assert(!HasFP && "unexpected function without stack frame but with FP");
+    assert(!SVEStackSize &&
+           "unexpected function without stack frame but with SVE objects");
      // All of the stack allocation is for locals.
      AFI->setLocalStackSize(NumBytes);
      if (!NumBytes)
@@ -926,6 +946,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
    AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
    bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
    if (CombineSPBump) {
+    assert(!SVEStackSize && "Cannot combine SP bump with SVE");
      emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
                      {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false,
                      NeedsWinCFI, &HasWinCFI);
@@ -1083,6 +1104,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
      NumBytes = 0;
    }
  
+  emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII,
+                  MachineInstr::FrameSetup);
+
    // Allocate space for the rest of the frame.
    if (NumBytes) {
      const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
@@ -1431,8 +1455,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
          .setMIFlag(MachineInstr::FrameDestroy);
    }
  
+  const StackOffset &SVEStackSize = getSVEStackSize(MF);
+
    // If there is a single SP update, insert it before the ret and we're done.
    if (CombineSPBump) {
+    assert(!SVEStackSize && "Cannot combine SP bump with SVE");
      emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
                      {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII,
                      MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
@@ -1446,6 +1473,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
    NumBytes -= PrologueSaveSize;
    assert(NumBytes >= 0 && "Negative stack allocation size!?");
  
+  // Deallocate the SVE area.
+  if (SVEStackSize)
+    if (!AFI->isStackRealigned())
+      emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize,
+                      TII, MachineInstr::FrameDestroy);
+
    if (!hasFP(MF)) {
      bool RedZone = canUseRedZone(MF);
      // If this was a redzone leaf function, we don't need to restore the
@@ -1595,6 +1628,11 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
    bool isCSR =
        !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
  
+  const StackOffset &SVEStackSize = getSVEStackSize(MF);
+  if (SVEStackSize)
+    llvm_unreachable("Accessing frame indices in presence of SVE "
+                     "not yet supported");
+
    // Use frame pointer to reference fixed objects. Use it for locals if
    // there are VLAs or a dynamically realigned SP (and thus the SP isn't
    // reliable as a base). Make sure useFPForScavengingIndex() does the
@@ -2175,8 +2213,19 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
               << ' ' << printReg(Reg, RegInfo);
               dbgs() << "\n";);
  
+  bool HasSVEStackObjects = [&MFI]() {
+    for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
+      if (MFI.getStackID(I) == TargetStackID::SVEVector &&
+          MFI.getObjectOffset(I) < 0)
+        return true;
+    // Note: We don't take allocatable stack objects into
+    // account yet, because allocation for those is not yet
+    // implemented.
+    return false;
+  }();
+
    // If any callee-saved registers are used, the frame cannot be eliminated.
-  bool CanEliminateFrame = SavedRegs.count() == 0;
+  bool CanEliminateFrame = (SavedRegs.count() == 0) && !HasSVEStackObjects;
  
    // The CSR spill slots have not been allocated yet, so estimateStackSize
    // won't include them.
@@ -2239,12 +2288,34 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
  
  void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
      MachineFunction &MF, RegScavenger *RS) const {
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
+         "Upwards growing stack unsupported");
+
+  // Process all fixed stack SVE objects.
+  int64_t Offset = 0;
+  for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
+    unsigned StackID = MFI.getStackID(I);
+    if (StackID == TargetStackID::SVEVector) {
+      int64_t FixedOffset = -MFI.getObjectOffset(I);
+      if (FixedOffset > Offset)
+        Offset = FixedOffset;
+    }
+  }
+
+  unsigned MaxAlign = getStackAlignment();
+  uint64_t SVEStackSize = alignTo(Offset, MaxAlign);
+
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  AFI->setStackSizeSVE(SVEStackSize);
+  assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+
    // If this function isn't doing Win64-style C++ EH, we don't need to do
    // anything.
    if (!MF.hasEHFunclets())
      return;
    const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-  MachineFrameInfo &MFI = MF.getFrameInfo();
    WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
  
    MachineBasicBlock &MBB = MF.front();
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h

index 7ed20d24607fb67aa9bb4d97c408f161969c0d8b..99d868a95a70737272e960b902f5344fedb21619 100644 (file)
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -87,6 +87,17 @@ public:
                                 int FI) const override;
    int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const;
  
+  bool isSupportedStackID(TargetStackID::Value ID) const override {
+    switch (ID) {
+    default:
+      return false;
+    case TargetStackID::Default:
+    case TargetStackID::SVEVector:
+    case TargetStackID::NoAlloc:
+      return true;
+    }
+  }
+
  private:
    bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
                                        unsigned StackBumpBytes) const;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp

index 097a8ba0ae19a5c706e38c31dba6fdc70292e448..1cc3177b26a7f400b82354ecb855ca559cd45d0c 100644 (file)
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3046,6 +3046,16 @@ static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
      MaxEncoding = 0xfff;
      ShiftSize = 12;
      break;
+  case AArch64::ADDVL_XXI:
+  case AArch64::ADDPL_XXI:
+    MaxEncoding = 31;
+    ShiftSize = 0;
+    if (Offset < 0) {
+      MaxEncoding = 32;
+      Sign = -1;
+      Offset = -Offset;
+    }
+    break;
    default:
      llvm_unreachable("Unsupported opcode");
    }
@@ -3117,8 +3127,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
                             StackOffset Offset, const TargetInstrInfo *TII,
                             MachineInstr::MIFlag Flag, bool SetNZCV,
                             bool NeedsWinCFI, bool *HasWinCFI) {
-  int64_t Bytes;
-  Offset.getForFrameOffset(Bytes);
+  int64_t Bytes, NumPredicateVectors, NumDataVectors;
+  Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
  
    // First emit non-scalable frame offsets, or a simple 'mov'.
    if (Bytes || (!Offset && SrcReg != DestReg)) {
@@ -3133,6 +3143,23 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
                         NeedsWinCFI, HasWinCFI);
      SrcReg = DestReg;
    }
+
+  assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
+         "SetNZCV not supported with SVE vectors");
+  assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
+         "WinCFI not supported with SVE vectors");
+
+  if (NumDataVectors) {
+    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
+                       AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
+    SrcReg = DestReg;
+  }
+
+  if (NumPredicateVectors) {
+    assert(DestReg != AArch64::SP && "Unaligned access to SP");
+    emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
+                       AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
+  }
  }
  
  MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h

index 0efeeb272ec1f9767c25bc6c0a65951b0482a941..a7d0a742573d7f5fc260a048967a442cc10527a2 100644 (file)
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -95,6 +95,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
    /// returned struct in a register. This field holds the virtual register into
    /// which the sret argument is passed.
    unsigned SRetReturnReg = 0;
+  /// SVE stack size (for predicates and data vectors) are maintained here
+  /// rather than in FrameInfo, as the placement and Stack IDs are target
+  /// specific.
+  uint64_t StackSizeSVE = 0;
+
+  /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid.
+  bool HasCalculatedStackSizeSVE = false;
  
    /// Has a value when it is known whether or not the function uses a
    /// redzone, and no value otherwise.
@@ -131,6 +138,15 @@ public:
      ArgumentStackToRestore = bytes;
    }
  
+  bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
+
+  void setStackSizeSVE(uint64_t S) {
+    HasCalculatedStackSizeSVE = true;
+    StackSizeSVE = S;
+  }
+
+  uint64_t getStackSizeSVE() const { return StackSizeSVE; }
+
    bool hasStackFrame() const { return HasStackFrame; }
    void setHasStackFrame(bool s) { HasStackFrame = s; }
  
diff --git a/lib/Target/AArch64/AArch64StackOffset.h b/lib/Target/AArch64/AArch64StackOffset.h

index 5f5cdfa2fad1cb7d2ebe6c472e8cb26813c907c7..13f12a6c9c30a488de40ba78254a6d1f50cacbf6 100644 (file)
--- a/lib/Target/AArch64/AArch64StackOffset.h
+++ b/lib/Target/AArch64/AArch64StackOffset.h
@@ -35,32 +35,38 @@ namespace llvm {
  /// vector and a 64bit GPR.
  class StackOffset {
    int64_t Bytes;
+  int64_t ScalableBytes;
  
    explicit operator int() const;
  
  public:
    using Part = std::pair<int64_t, MVT>;
  
-  StackOffset() : Bytes(0) {}
+  StackOffset() : Bytes(0), ScalableBytes(0) {}
  
    StackOffset(int64_t Offset, MVT::SimpleValueType T) : StackOffset() {
-    assert(!MVT(T).isScalableVector() && "Scalable types not supported");
+    assert(MVT(T).getSizeInBits() % 8 == 0 &&
+           "Offset type is not a multiple of bytes");
      *this += Part(Offset, T);
    }
  
-  StackOffset(const StackOffset &Other) : Bytes(Other.Bytes) {}
+  StackOffset(const StackOffset &Other)
+      : Bytes(Other.Bytes), ScalableBytes(Other.ScalableBytes) {}
  
    StackOffset &operator=(const StackOffset &) = default;
  
    StackOffset &operator+=(const StackOffset::Part &Other) {
-    assert(Other.second.getSizeInBits() % 8 == 0 &&
-           "Offset type is not a multiple of bytes");
-    Bytes += Other.first * (Other.second.getSizeInBits() / 8);
+    int64_t OffsetInBytes = Other.first * (Other.second.getSizeInBits() / 8);
+    if (Other.second.isScalableVector())
+      ScalableBytes += OffsetInBytes;
+    else
+      Bytes += OffsetInBytes;
      return *this;
    }
  
    StackOffset &operator+=(const StackOffset &Other) {
      Bytes += Other.Bytes;
+    ScalableBytes += Other.ScalableBytes;
      return *this;
    }
  
@@ -72,6 +78,7 @@ public:
  
    StackOffset &operator-=(const StackOffset &Other) {
      Bytes -= Other.Bytes;
+    ScalableBytes -= Other.ScalableBytes;
      return *this;
    }
  
@@ -88,16 +95,42 @@ public:
      return Res;
    }
  
+  /// Returns the scalable part of the offset in bytes.
+  int64_t getScalableBytes() const { return ScalableBytes; }
+
    /// Returns the non-scalable part of the offset in bytes.
    int64_t getBytes() const { return Bytes; }
  
    /// Returns the offset in parts to which this frame offset can be
    /// decomposed for the purpose of describing a frame offset.
    /// For non-scalable offsets this is simply its byte size.
-  void getForFrameOffset(int64_t &ByteSized) const { ByteSized = Bytes; }
+  void getForFrameOffset(int64_t &NumBytes, int64_t &NumPredicateVectors,
+                         int64_t &NumDataVectors) const {
+    assert(isValid() && "Invalid frame offset");
+
+    NumBytes = Bytes;
+    NumDataVectors = 0;
+    NumPredicateVectors = ScalableBytes / 2;
+    // This method is used to get the offsets to adjust the frame offset.
+    // If the function requires ADDPL to be used and needs more than two ADDPL
+    // instructions, part of the offset is folded into NumDataVectors so that it
+    // uses ADDVL for part of it, reducing the number of ADDPL instructions.
+    if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
+        NumPredicateVectors > 62) {
+      NumDataVectors = NumPredicateVectors / 8;
+      NumPredicateVectors -= NumDataVectors * 8;
+    }
+  }
  
    /// Returns whether the offset is known zero.
-  explicit operator bool() const { return Bytes; }
+  explicit operator bool() const { return Bytes || ScalableBytes; }
+
+  bool isValid() const {
+    // The smallest scalable element supported by scaled SVE addressing
+    // modes are predicates, which are 2 scalable bytes in size. So the scalable
+    // byte offset must always be a multiple of 2.
+    return ScalableBytes % 2 == 0;
+  }
  };
  
  } // end namespace llvm
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp

index 22f035e7f3e69b4f046f23e9754c8b2545e97e2c..ed07ed100a1924e5b16a3351d586993a78240d6b 100644 (file)
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -673,6 +673,8 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
    case TargetStackID::NoAlloc:
    case TargetStackID::SGPRSpill:
      return true;
+  case TargetStackID::SVEVector:
+    return false;
    }
    llvm_unreachable("Invalid TargetStackID::Value");
  }
diff --git a/test/CodeGen/AArch64/framelayout-sve.mir b/test/CodeGen/AArch64/framelayout-sve.mir

new file mode 100644 (file)

index 0000000..9009a6a
--- /dev/null
+++ b/test/CodeGen/AArch64/framelayout-sve.mir
@@ -0,0 +1,121 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
+#
+# Test allocation and deallocation of SVE objects on the stack,
+# as well as using a combination of scalable and non-scalable
+# offsets to access the SVE on the stack.
+#
+# SVE objects are allocated below the (scalar) callee saves,
+# and above spills/locals and the alignment gap, e.g.
+#
+#     +-------------+
+#     | stack arg   |
+#     +-------------+ <- SP before call
+#     | Callee Saves|
+#     | Frame record|       (if available)
+#     |-------------| <- FP (if available)
+#     |  SVE area   |
+#     +-------------+
+#     |/////////////| alignment gap.
+#     |     :       |
+#     | Stack objs  |
+#     |     :       |
+#     +-------------+ <- SP after call and frame-setup
+#
+--- |
+
+  define void @test_allocate_sve() nounwind { entry: unreachable }
+  define void @test_allocate_sve_gpr_callee_saves() nounwind { entry: unreachable }
+  define void @test_allocate_sve_gpr_realigned() nounwind { entry: unreachable }
+
+...
+# +----------+
+# | %fixed-  |  // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
+# |  stack.0 |  // to be materialized with 2*ADDVL (<=> 2 * n * 16bytes)
+# +----------+
+# | %stack.0 |  // not scalable
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_allocate_sve
+# CHECK:       stackSize: 16
+
+# CHECK:      bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: RET_ReallyLR
+name:            test_allocate_sve
+fixedStack:
+  - { id: 0, stack-id: sve-vec, size: 18, alignment: 2, offset: -18 }
+stack:
+  - { id: 0, stack-id: default, size: 16, alignment: 8 }
+body:             |
+  bb.0.entry:
+    RET_ReallyLR
+---
+...
+# +----------+
+# | x20, x21 |  // callee saves
+# +----------+
+# | %fixed-  |  // scalable objects
+# |  stack.0 |
+# +----------+
+# | %stack.0 |  // not scalable
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_allocate_sve_gpr_callee_saves
+# CHECK:       stackSize: 32
+
+# CHECK:      bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STPXpre killed $x21, killed $x20, $sp, -2
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $x20 = IMPLICIT_DEF
+# CHECK-NEXT: $x21 = IMPLICIT_DEF
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: $sp, $x21, $x20 = frame-destroy LDPXpost $sp, 2
+# CHECK-NEXT: RET_ReallyLR
+name:            test_allocate_sve_gpr_callee_saves
+fixedStack:
+  - { id: 0, stack-id: sve-vec, size: 18, alignment: 2, offset: -18 }
+stack:
+  - { id: 0, stack-id: default, size: 16, alignment: 8 }
+body:             |
+  bb.0.entry:
+    $x20 = IMPLICIT_DEF
+    $x21 = IMPLICIT_DEF
+    RET_ReallyLR
+---
+...
+# +----------+
+# |  lr, fp  |  // frame record
+# +----------+ <- FP
+# | %fixed-  |  // scalable objects
+# |  stack.0 |
+# +----------+
+# |//////////|  // alignment gap
+# | %stack.0 |  // not scalable
+# +----------+ <- SP
+# CHECK-LABEL: name: test_allocate_sve_gpr_realigned
+# CHECK:       stackSize: 32
+
+# CHECK:      bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2
+# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
+# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
+# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
+# CHECK-NEXT: RET_ReallyLR
+name:            test_allocate_sve_gpr_realigned
+fixedStack:
+  - { id: 0, stack-id: sve-vec, size: 18, alignment: 2, offset: -18 }
+stack:
+  - { id: 0, stack-id: default, size: 16, alignment: 32 }
+body:             |
+  bb.0.entry:
+    RET_ReallyLR
+---
diff --git a/unittests/Target/AArch64/TestStackOffset.cpp b/unittests/Target/AArch64/TestStackOffset.cpp

index 240cec9f2d0b31fcf886eeff57bcc7ba31d59934..c85135ef6605871a32a3c8c14525d2a5e6484a05 100644 (file)
--- a/unittests/Target/AArch64/TestStackOffset.cpp
+++ b/unittests/Target/AArch64/TestStackOffset.cpp
@@ -20,6 +20,15 @@ TEST(StackOffset, MixedSize) {
  
    StackOffset C(2, MVT::v4i64);
    EXPECT_EQ(64, C.getBytes());
+
+  StackOffset D(2, MVT::nxv4i64);
+  EXPECT_EQ(64, D.getScalableBytes());
+
+  StackOffset E(2, MVT::v4i64);
+  EXPECT_EQ(0, E.getScalableBytes());
+
+  StackOffset F(2, MVT::nxv4i64);
+  EXPECT_EQ(0, F.getBytes());
  }
  
  TEST(StackOffset, Add) {
@@ -31,6 +40,11 @@ TEST(StackOffset, Add) {
    StackOffset D(1, MVT::i32);
    D += A;
    EXPECT_EQ(12, D.getBytes());
+
+  StackOffset E(1, MVT::nxv1i32);
+  StackOffset F = C + E;
+  EXPECT_EQ(12, F.getBytes());
+  EXPECT_EQ(4, F.getScalableBytes());
  }
  
  TEST(StackOffset, Sub) {
@@ -42,6 +56,12 @@ TEST(StackOffset, Sub) {
    StackOffset D(1, MVT::i64);
    D -= A;
    EXPECT_EQ(0, D.getBytes());
+
+  C += StackOffset(2, MVT::nxv1i32);
+  StackOffset E = StackOffset(1, MVT::nxv1i32);
+  StackOffset F = C - E;
+  EXPECT_EQ(4, F.getBytes());
+  EXPECT_EQ(4, F.getScalableBytes());
  }
  
  TEST(StackOffset, isZero) {
@@ -49,12 +69,63 @@ TEST(StackOffset, isZero) {
    StackOffset B(0, MVT::i32);
    EXPECT_TRUE(!A);
    EXPECT_TRUE(!(A + B));
+
+  StackOffset C(0, MVT::nxv1i32);
+  EXPECT_TRUE(!(A + C));
+
+  StackOffset D(1, MVT::nxv1i32);
+  EXPECT_FALSE(!(A + D));
+}
+
+TEST(StackOffset, isValid) {
+  EXPECT_FALSE(StackOffset(1, MVT::nxv8i1).isValid());
+  EXPECT_TRUE(StackOffset(2, MVT::nxv8i1).isValid());
+
+#ifndef NDEBUG
+#ifdef GTEST_HAS_DEATH_TEST
+  EXPECT_DEATH(StackOffset(1, MVT::i1),
+               "Offset type is not a multiple of bytes");
+  EXPECT_DEATH(StackOffset(1, MVT::nxv1i1),
+               "Offset type is not a multiple of bytes");
+#endif // defined GTEST_HAS_DEATH_TEST
+#endif // not defined NDEBUG
  }
  
  TEST(StackOffset, getForFrameOffset) {
    StackOffset A(1, MVT::i64);
    StackOffset B(1, MVT::i32);
-  int64_t ByteSized;
-  (A + B).getForFrameOffset(ByteSized);
+  StackOffset C(1, MVT::nxv4i32);
+
+  // If all offsets can be materialized with only ADDVL,
+  // make sure PLSized is 0.
+  int64_t ByteSized, VLSized, PLSized;
+  (A + B + C).getForFrameOffset(ByteSized, PLSized, VLSized);
    EXPECT_EQ(12, ByteSized);
+  EXPECT_EQ(1, VLSized);
+  EXPECT_EQ(0, PLSized);
+
+  // If we need an ADDPL to materialize the offset, and the number of scalable
+  // bytes fits the ADDPL immediate, fold the scalable bytes to fit in PLSized.
+  StackOffset D(1, MVT::nxv16i1);
+  (C + D).getForFrameOffset(ByteSized, PLSized, VLSized);
+  EXPECT_EQ(0, ByteSized);
+  EXPECT_EQ(0, VLSized);
+  EXPECT_EQ(9, PLSized);
+
+  StackOffset E(4, MVT::nxv4i32);
+  StackOffset F(1, MVT::nxv16i1);
+  (E + F).getForFrameOffset(ByteSized, PLSized, VLSized);
+  EXPECT_EQ(0, ByteSized);
+  EXPECT_EQ(0, VLSized);
+  EXPECT_EQ(33, PLSized);
+
+  // If the offset requires an ADDPL instruction to materialize, and would
+  // require more than two instructions, decompose it into both
+  // ADDVL (n x 16 bytes) and ADDPL (n x 2 bytes) instructions.
+  StackOffset G(8, MVT::nxv4i32);
+  StackOffset H(1, MVT::nxv16i1);
+  (G + H).getForFrameOffset(ByteSized, PLSized, VLSized);
+  EXPECT_EQ(0, ByteSized);
+  EXPECT_EQ(8, VLSized);
+  EXPECT_EQ(1, PLSized);
  }
author	Sander de Smalen <sander.desmalen@arm.com>
	Thu, 3 Oct 2019 11:33:50 +0000 (11:33 +0000)
committer	Sander de Smalen <sander.desmalen@arm.com>
	Thu, 3 Oct 2019 11:33:50 +0000 (11:33 +0000)
include/llvm/CodeGen/MIRYamlMapping.h		patch \| blob \| history
include/llvm/CodeGen/TargetFrameLowering.h		patch \| blob \| history
lib/Target/AArch64/AArch64FrameLowering.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64FrameLowering.h		patch \| blob \| history
lib/Target/AArch64/AArch64InstrInfo.cpp		patch \| blob \| history
lib/Target/AArch64/AArch64MachineFunctionInfo.h		patch \| blob \| history
lib/Target/AArch64/AArch64StackOffset.h		patch \| blob \| history
lib/Target/AMDGPU/SIFrameLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/framelayout-sve.mir	[new file with mode: 0644]	patch \| blob
unittests/Target/AArch64/TestStackOffset.cpp		patch \| blob \| history