From 55e2d2fb653e2c298fe9832e1e6963c328ff4c7b Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 21 Aug 2017 21:56:11 +0000 Subject: [PATCH] GlobalISel (AArch64): fix ABI at border between GPRs and SP. If a struct would end up half in GPRs and half on SP the ABI says it should actually go entirely on the stack. We were getting this wrong in GlobalISel before, causing compatibility issues. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@311388 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64CallLowering.cpp | 17 ++++-- lib/Target/AArch64/AArch64CallLowering.h | 1 + lib/Target/AArch64/AArch64ISelLowering.h | 7 +-- .../AArch64/GlobalISel/call-translator-ios.ll | 59 +++++++++++++++++++ .../AArch64/GlobalISel/call-translator.ll | 59 +++++++++++++++++++ 5 files changed, 133 insertions(+), 10 deletions(-) diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp index a1cc0b53a43..13769a22800 100644 --- a/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/lib/Target/AArch64/AArch64CallLowering.cpp @@ -172,7 +172,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler { void AArch64CallLowering::splitToValueTypes( const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, - const DataLayout &DL, MachineRegisterInfo &MRI, + const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv, const SplitArgTy &PerformArgSplit) const { const AArch64TargetLowering &TLI = *getTLI(); LLVMContext &Ctx = OrigArg.Ty->getContext(); @@ -190,14 +190,19 @@ void AArch64CallLowering::splitToValueTypes( } unsigned FirstRegIdx = SplitArgs.size(); + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + OrigArg.Ty, CallConv, false); for (auto SplitVT : SplitVTs) { - // FIXME: set split flags if they're actually used (e.g. i128 on AAPCS). Type *SplitTy = SplitVT.getTypeForEVT(Ctx); SplitArgs.push_back( ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)), SplitTy, OrigArg.Flags, OrigArg.IsFixed}); + if (NeedsRegBlock) + SplitArgs.back().Flags.setInConsecutiveRegs(); } + SplitArgs.back().Flags.setInConsecutiveRegsLast(); + for (unsigned i = 0; i < Offsets.size(); ++i) PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8); } @@ -220,7 +225,7 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); SmallVector SplitArgs; - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(), [&](unsigned Reg, uint64_t Offset) { MIRBuilder.buildExtract(Reg, VReg, Offset); }); @@ -250,7 +255,7 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, LLT Ty = MRI.getType(VRegs[i]); unsigned Dst = VRegs[i]; - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(), [&](unsigned Reg, uint64_t Offset) { if (!Split) { Split = true; @@ -311,7 +316,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, SmallVector SplitArgs; for (auto &OrigArg : OrigArgs) { - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CallConv, [&](unsigned Reg, uint64_t Offset) { MIRBuilder.buildExtract(Reg, OrigArg.Reg, Offset); }); @@ -364,7 +369,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, SmallVector RegOffsets; SmallVector SplitRegs; - splitToValueTypes(OrigRet, SplitArgs, DL, MRI, + splitToValueTypes(OrigRet, SplitArgs, DL, MRI, F.getCallingConv(), [&](unsigned Reg, uint64_t Offset) { RegOffsets.push_back(Offset); SplitRegs.push_back(Reg); diff --git a/lib/Target/AArch64/AArch64CallLowering.h b/lib/Target/AArch64/AArch64CallLowering.h index ad01d0539b0..68c127fc42e 100644 --- a/lib/Target/AArch64/AArch64CallLowering.h +++ b/lib/Target/AArch64/AArch64CallLowering.h @@ -56,6 +56,7 @@ private: void splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, + CallingConv::ID CallConv, const SplitArgTy &SplitArg) const; }; diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 1b36a56aa16..f4e08ad165e 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -471,6 +471,9 @@ public: MachineMemOperand::Flags getMMOFlags(const Instruction &I) const override; + bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, + CallingConv::ID CallConv, + bool isVarArg) const override; private: bool isExtFreeImpl(const Instruction *Ext) const override; @@ -640,10 +643,6 @@ private: void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const override; - bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, - CallingConv::ID CallConv, - bool isVarArg) const override; - bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; }; diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll index 38a90bbfbbd..cdcdb964462 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll @@ -33,3 +33,62 @@ define void @test_call_stack() { call signext i8 @test_stack_slots([8 x i64] undef, i8 signext 42, i8 signext 12) ret void } + +; CHECK-LABEL: name: test_128bit_struct +; CHECK: %x0 = COPY +; CHECK: %x1 = COPY +; CHECK: %x2 = COPY +; CHECK: BL @take_128bit_struct +define void @test_128bit_struct([2 x i64]* %ptr) { + %struct = load [2 x i64], [2 x i64]* %ptr + call void @take_128bit_struct([2 x i64]* null, [2 x i64] %struct) + ret void +} + +; CHECK-LABEL: name: take_128bit_struct +; CHECK: {{%.*}}(p0) = COPY %x0 +; CHECK: {{%.*}}(s64) = COPY %x1 +; CHECK: {{%.*}}(s64) = COPY %x2 +define void @take_128bit_struct([2 x i64]* %ptr, [2 x i64] %in) { + store [2 x i64] %in, [2 x i64]* %ptr + ret void +} + +; CHECK-LABEL: name: test_split_struct +; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD {{.*}}(p0) +; CHECK: [[LO:%[0-9]+]](s64) = G_EXTRACT [[STRUCT]](s128), 0 +; CHECK: [[HI:%[0-9]+]](s64) = G_EXTRACT [[STRUCT]](s128), 64 + +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF:%[0-9]+]](s64) = G_CONSTANT i64 0 +; CHECK: [[ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF]] +; CHECK: G_STORE [[LO]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0) + +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF:%[0-9]+]](s64) = G_CONSTANT i64 8 +; CHECK: [[ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF]] +; CHECK: G_STORE [[HI]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0) +define void @test_split_struct([2 x i64]* %ptr) { + %struct = load [2 x i64], [2 x i64]* %ptr + call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, + i64 4, i64 5, i64 6, + [2 x i64] %struct) + ret void +} + +; CHECK-LABEL: name: take_split_struct +; CHECK: fixedStack: +; CHECK-DAG: - { id: [[LO_FRAME:[0-9]+]], type: default, offset: 0, size: 8 +; CHECK-DAG: - { id: [[HI_FRAME:[0-9]+]], type: default, offset: 8, size: 8 + +; CHECK: [[LOPTR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[LO_FRAME]] +; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD [[LOPTR]](p0) :: (invariant load 8 from %fixed-stack.[[LO_FRAME]], align 0) + +; CHECK: [[HIPTR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[HI_FRAME]] +; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD [[HIPTR]](p0) :: (invariant load 8 from %fixed-stack.[[HI_FRAME]], align 0) +define void @take_split_struct([2 x i64]* %ptr, i64, i64, i64, + i64, i64, i64, + [2 x i64] %in) { + store [2 x i64] %in, [2 x i64]* %ptr + ret void +} diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/test/CodeGen/AArch64/GlobalISel/call-translator.ll index 1cc196be359..004e3fd2a1d 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -215,3 +215,62 @@ define void @test_call_stack() { define void @test_mem_i1([8 x i64], i1 %in) { ret void } + +; CHECK-LABEL: name: test_128bit_struct +; CHECK: %x0 = COPY +; CHECK: %x1 = COPY +; CHECK: %x2 = COPY +; CHECK: BL @take_128bit_struct +define void @test_128bit_struct([2 x i64]* %ptr) { + %struct = load [2 x i64], [2 x i64]* %ptr + call void @take_128bit_struct([2 x i64]* null, [2 x i64] %struct) + ret void +} + +; CHECK-LABEL: name: take_128bit_struct +; CHECK: {{%.*}}(p0) = COPY %x0 +; CHECK: {{%.*}}(s64) = COPY %x1 +; CHECK: {{%.*}}(s64) = COPY %x2 +define void @take_128bit_struct([2 x i64]* %ptr, [2 x i64] %in) { + store [2 x i64] %in, [2 x i64]* %ptr + ret void +} + +; CHECK-LABEL: name: test_split_struct +; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD {{.*}}(p0) +; CHECK: [[LO:%[0-9]+]](s64) = G_EXTRACT [[STRUCT]](s128), 0 +; CHECK: [[HI:%[0-9]+]](s64) = G_EXTRACT [[STRUCT]](s128), 64 + +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF:%[0-9]+]](s64) = G_CONSTANT i64 0 +; CHECK: [[ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF]] +; CHECK: G_STORE [[LO]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0) + +; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp +; CHECK: [[OFF:%[0-9]+]](s64) = G_CONSTANT i64 8 +; CHECK: [[ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF]] +; CHECK: G_STORE [[HI]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0) +define void @test_split_struct([2 x i64]* %ptr) { + %struct = load [2 x i64], [2 x i64]* %ptr + call void @take_split_struct([2 x i64]* null, i64 1, i64 2, i64 3, + i64 4, i64 5, i64 6, + [2 x i64] %struct) + ret void +} + +; CHECK-LABEL: name: take_split_struct +; CHECK: fixedStack: +; CHECK-DAG: - { id: [[LO_FRAME:[0-9]+]], type: default, offset: 0, size: 8 +; CHECK-DAG: - { id: [[HI_FRAME:[0-9]+]], type: default, offset: 8, size: 8 + +; CHECK: [[LOPTR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[LO_FRAME]] +; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD [[LOPTR]](p0) :: (invariant load 8 from %fixed-stack.[[LO_FRAME]], align 0) + +; CHECK: [[HIPTR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[HI_FRAME]] +; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD [[HIPTR]](p0) :: (invariant load 8 from %fixed-stack.[[HI_FRAME]], align 0) +define void @take_split_struct([2 x i64]* %ptr, i64, i64, i64, + i64, i64, i64, + [2 x i64] %in) { + store [2 x i64] %in, [2 x i64]* %ptr + ret void +} -- 2.50.1