From: Hal Finkel Date: Wed, 25 Feb 2015 21:36:59 +0000 (+0000) Subject: [PowerPC] Make LDtocL and friends invariant loads X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7840990de8f102bf83ee297e74f225b109228512;p=llvm [PowerPC] Make LDtocL and friends invariant loads LDtocL, and other loads that roughly correspond to the TOC_ENTRY SDAG node, represent loads from the TOC, which is invariant. As a result, these loads can be hoisted out of loops, etc. In order to do this, we need to generate GOT-style MMOs for TOC_ENTRY, which requires treating it as a legitimate memory intrinsic node type. Once this is done, the MMO transfer is automatically handled for TableGen-driven instruction selection, and for nodes generated directly in PPCISelDAGToDAG, we need to transfer the MMOs manually. Also, we were not transferring MMOs associated with pre-increment loads, so do that too. Lastly, this fixes an exposed bug where R30 was not added as a defined operand of UpdateGBR. This problem was highlighted by an example (used to generate the test case) posted to llvmdev by Francois Pichet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@230553 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 0d553d32f31..b10e85437ba 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -223,6 +223,8 @@ private: bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); + + SDNode *transferMemOperands(SDNode *N, SDNode *Result); }; } @@ -315,7 +317,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); BuildMI(FirstMBB, MBBI, dl, - TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg) + TII.get(PPC::UpdateGBR), GlobalBaseReg) .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); MF->getInfo()->setUsesPICBase(true); } @@ -2342,6 +2344,14 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1)); } +SDNode *PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(Result)->setMemRefs(MemOp, MemOp + 1); + return Result; +} + // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. @@ -2460,9 +2470,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; - return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), - PPCLowering->getPointerTy(), - MVT::Other, Ops); + return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + PPCLowering->getPointerTy(), + MVT::Other, Ops)); } else { unsigned Opcode; bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; @@ -2497,9 +2508,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Base, Offset, Chain }; - return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), - PPCLowering->getPointerTy(), - MVT::Other, Ops); + return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl, + LD->getValueType(0), + PPCLowering->getPointerTy(), + MVT::Other, Ops)); } } @@ -2851,8 +2863,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { "Only supported for 64-bit ABI and 32-bit SVR4"); if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { SDValue GA = N->getOperand(0); - return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, - N->getOperand(1)); + return transferMemOperands(N, CurDAG->getMachineNode(PPC::LWZtoc, dl, + MVT::i32, GA, N->getOperand(1))); } // For medium and large code model, we generate two instructions as @@ -2872,12 +2884,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue GA = N->getOperand(0); SDValue TOCbase = N->getOperand(1); SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, - TOCbase, GA); + TOCbase, GA); if (isa(GA) || isa(GA) || CModel == CodeModel::Large) - return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, - SDValue(Tmp, 0)); + return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl, + MVT::i64, GA, SDValue(Tmp, 0))); if (GlobalAddressSDNode *G = dyn_cast(GA)) { const GlobalValue *GValue = G->getGlobal(); @@ -2885,8 +2897,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { (GValue->isDeclaration() || GValue->isWeakForLinker())) || GValue->isDeclaration() || GValue->hasCommonLinkage() || GValue->hasAvailableExternallyLinkage()) - return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, - SDValue(Tmp, 0)); + return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl, + MVT::i64, GA, SDValue(Tmp, 0))); } return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f59cad50152..ae80fa3d761 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1821,6 +1821,19 @@ static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } +static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit, + SDValue GA) { + EVT VT = Is64Bit ? MVT::i64 : MVT::i32; + SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : + DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); + + SDValue Ops[] = { GA, Reg }; + return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl, + DAG.getVTList(VT, MVT::Other), Ops, VT, + MachinePointerInfo::getGOT(), 0, false, true, + false, 0); +} + SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); @@ -1832,8 +1845,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); - return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, - DAG.getRegister(PPC::X2, MVT::i64)); + return getTOCEntry(DAG, SDLoc(CP), true, GA); } unsigned MOHiFlag, MOLoFlag; @@ -1843,9 +1855,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, if (isPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), PPCII::MO_PIC_FLAG); - SDLoc DL(CP); - return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, - DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); + return getTOCEntry(DAG, SDLoc(CP), false, GA); } SDValue CPIHi = @@ -1864,8 +1874,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, - DAG.getRegister(PPC::X2, MVT::i64)); + return getTOCEntry(DAG, SDLoc(JT), true, GA); } unsigned MOHiFlag, MOLoFlag; @@ -1875,9 +1884,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { if (isPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); - SDLoc DL(GA); - return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA, - DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); + return getTOCEntry(DAG, SDLoc(GA), false, GA); } SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); @@ -1896,8 +1903,7 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); - return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA, - DAG.getRegister(PPC::X2, MVT::i64)); + return getTOCEntry(DAG, SDLoc(BASDN), true, GA); } unsigned MOHiFlag, MOLoFlag; @@ -2007,8 +2013,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); - return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, - DAG.getRegister(PPC::X2, MVT::i64)); + return getTOCEntry(DAG, DL, true, GA); } unsigned MOHiFlag, MOLoFlag; @@ -2019,8 +2024,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); - return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, - DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32)); + return getTOCEntry(DAG, DL, false, GA); } SDValue GAHi = diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index a2a824106b1..faa1e3f06ff 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -71,8 +71,6 @@ namespace llvm { /// though these are usually folded into other nodes. Hi, Lo, - TOC_ENTRY, - /// The following two target-specific nodes are used for calls through /// function pointers in the 64-bit SVR4 ABI. @@ -337,7 +335,12 @@ namespace llvm { /// QBRC, CHAIN = QVLFSb CHAIN, Ptr /// The 4xf32 load used for v4i1 constants. - QVLFSb + QVLFSb, + + /// GPRC = TOC_ENTRY GA, TOC + /// Loads the entry for GA from the TOC, where the TOC base is given by + /// the last operand. + TOC_ENTRY }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index cee58a6bfab..1a045b1393d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -119,7 +119,8 @@ def PPCfsel : SDNode<"PPCISD::FSEL", def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; -def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; +def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, + [SDNPMayLoad, SDNPMemOperand]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; diff --git a/test/CodeGen/PowerPC/ldtoc-inv.ll b/test/CodeGen/PowerPC/ldtoc-inv.ll new file mode 100644 index 00000000000..550747c4695 --- /dev/null +++ b/test/CodeGen/PowerPC/ldtoc-inv.ll @@ -0,0 +1,39 @@ +; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@phasor = external constant [4096 x i32] + +; Function Attrs: nounwind +define void @test(i32* nocapture %out, i32 zeroext %step_size) #0 { +entry: + %shl = shl i32 %step_size, 2 + %idxprom = zext i32 %shl to i64 + br label %for.body + +; Make sure that the TOC load has been hoisted out of the loop. +; CHECK-LABEL: @test +; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc@l +; CHECK: %for.body +; CHECK: blr + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = trunc i64 %indvars.iv to i32 + %shl1 = shl i32 %0, %step_size + %idxprom2 = sext i32 %shl1 to i64 + %arrayidx.sum = add nsw i64 %idxprom2, %idxprom + %arrayidx3 = getelementptr inbounds [4096 x i32]* @phasor, i64 0, i64 %arrayidx.sum + %1 = load i32* %arrayidx3, align 4 + %arrayidx5 = getelementptr inbounds i32* %out, i64 %indvars.iv + store i32 %1, i32* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4 + %cmp = icmp slt i64 %indvars.iv.next, 1020 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +attributes #0 = { nounwind } + diff --git a/test/CodeGen/PowerPC/ppc64le-aggregates.ll b/test/CodeGen/PowerPC/ppc64le-aggregates.ll index 4fe6f8db33f..3fce36ec23b 100644 --- a/test/CodeGen/PowerPC/ppc64le-aggregates.ll +++ b/test/CodeGen/PowerPC/ppc64le-aggregates.ll @@ -264,26 +264,26 @@ entry: ret void } ; CHECK-LABEL: @caller2 -; CHECK: ld [[REG:[0-9]+]], .LC -; CHECK-DAG: lfs 1, 0([[REG]]) -; CHECK-DAG: lfs 2, 4([[REG]]) -; CHECK-DAG: lfs 3, 8([[REG]]) -; CHECK-DAG: lfs 4, 12([[REG]]) -; CHECK-DAG: lfs 5, 16([[REG]]) -; CHECK-DAG: lfs 6, 20([[REG]]) -; CHECK-DAG: lfs 7, 24([[REG]]) -; CHECK-DAG: lfs 8, 28([[REG]]) -; CHECK: ld [[REG:[0-9]+]], .LC -; CHECK-DAG: lfs 9, 0([[REG]]) -; CHECK-DAG: lfs 10, 4([[REG]]) -; CHECK-DAG: lfs 11, 8([[REG]]) -; CHECK-DAG: lfs 12, 12([[REG]]) -; CHECK-DAG: lfs 13, 16([[REG]]) -; CHECK: ld [[REG:[0-9]+]], .LC -; CHECK-DAG: lwz [[REG0:[0-9]+]], 0([[REG]]) -; CHECK-DAG: lwz [[REG1:[0-9]+]], 4([[REG]]) -; CHECK-DAG: sldi [[REG1]], [[REG1]], 32 -; CHECK-DAG: or 10, [[REG0]], [[REG1]] +; CHECK: ld {{[0-9]+}}, .LC +; CHECK-DAG: lfs 1, 0({{[0-9]+}}) +; CHECK-DAG: lfs 2, 4({{[0-9]+}}) +; CHECK-DAG: lfs 3, 8({{[0-9]+}}) +; CHECK-DAG: lfs 4, 12({{[0-9]+}}) +; CHECK-DAG: lfs 5, 16({{[0-9]+}}) +; CHECK-DAG: lfs 6, 20({{[0-9]+}}) +; CHECK-DAG: lfs 7, 24({{[0-9]+}}) +; CHECK-DAG: lfs 8, 28({{[0-9]+}}) + +; CHECK-DAG: lfs 9, 0({{[0-9]+}}) +; CHECK-DAG: lfs 10, 4({{[0-9]+}}) +; CHECK-DAG: lfs 11, 8({{[0-9]+}}) +; CHECK-DAG: lfs 12, 12({{[0-9]+}}) +; CHECK-DAG: lfs 13, 16({{[0-9]+}}) + +; CHECK-DAG: lwz [[REG0:[0-9]+]], 0({{[0-9]+}}) +; CHECK-DAG: lwz [[REG1:[0-9]+]], 4({{[0-9]+}}) +; CHECK-DAG: sldi [[REG2:[0-9]+]], [[REG1]], 32 +; CHECK-DAG: or 10, [[REG0]], [[REG2]] ; CHECK: bl test2 declare void @test2([8 x float], [5 x float], [2 x float]) diff --git a/test/CodeGen/PowerPC/tls-store2.ll b/test/CodeGen/PowerPC/tls-store2.ll index a9c97b5e23e..e9aa17e8c0f 100644 --- a/test/CodeGen/PowerPC/tls-store2.ll +++ b/test/CodeGen/PowerPC/tls-store2.ll @@ -22,7 +22,10 @@ entry: ; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_callable@tlsgd) ; CHECK-NEXT: nop -; CHECK: std {{[0-9]+}}, 0(3) +; FIXME: We could check here for 'std {{[0-9]+}}, 0(3)', but that no longer +; works because, with new scheduling freedom, we create a copy of R3 based on the +; initial scheduling, but don't coalesce it again after we move the instructions +; so that the copy is no longer necessary. ; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l ; CHECK: bl __tls_get_addr(__once_call@tlsgd) ; CHECK-NEXT: nop diff --git a/test/CodeGen/PowerPC/vec-abi-align.ll b/test/CodeGen/PowerPC/vec-abi-align.ll index 5075ff2b8c0..2ec57af3513 100644 --- a/test/CodeGen/PowerPC/vec-abi-align.ll +++ b/test/CodeGen/PowerPC/vec-abi-align.ll @@ -35,17 +35,17 @@ entry: ret void ; CHECK-LABEL: @test2 -; CHECK: ld {{[0-9]+}}, 112(1) -; CHECK: li [[REG16:[0-9]+]], 16 -; CHECK: addi [[REGB:[0-9]+]], 1, 112 -; CHECK: lvx 2, [[REGB]], [[REG16]] +; CHECK-DAG: ld {{[0-9]+}}, 112(1) +; CHECK-DAG: li [[REG16:[0-9]+]], 16 +; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 112 +; CHECK-DAG: lvx 2, [[REGB]], [[REG16]] ; CHECK: blr ; CHECK-VSX-LABEL: @test2 -; CHECK-VSX: ld {{[0-9]+}}, 112(1) -; CHECK-VSX: li [[REG16:[0-9]+]], 16 -; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 112 -; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]] +; CHECK-VSX-DAG: ld {{[0-9]+}}, 112(1) +; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16 +; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 112 +; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]] ; CHECK-VSX: blr } @@ -61,17 +61,17 @@ entry: ret void ; CHECK-LABEL: @test3 -; CHECK: ld {{[0-9]+}}, 128(1) -; CHECK: li [[REG16:[0-9]+]], 16 -; CHECK: addi [[REGB:[0-9]+]], 1, 128 -; CHECK: lvx 2, [[REGB]], [[REG16]] +; CHECK-DAG: ld {{[0-9]+}}, 128(1) +; CHECK-DAG: li [[REG16:[0-9]+]], 16 +; CHECK-DAG: addi [[REGB:[0-9]+]], 1, 128 +; CHECK-DAG: lvx 2, [[REGB]], [[REG16]] ; CHECK: blr ; CHECK-VSX-LABEL: @test3 -; CHECK-VSX: ld {{[0-9]+}}, 128(1) -; CHECK-VSX: li [[REG16:[0-9]+]], 16 -; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 128 -; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]] +; CHECK-VSX-DAG: ld {{[0-9]+}}, 128(1) +; CHECK-VSX-DAG: li [[REG16:[0-9]+]], 16 +; CHECK-VSX-DAG: addi [[REGB:[0-9]+]], 1, 128 +; CHECK-VSX-DAG: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]] ; CHECK-VSX: blr }