From 69359ed45b57ad4b2add357d2a11fbb91c0fde08 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sat, 26 Jul 2014 05:49:40 +0000 Subject: [PATCH] [SDAG] When performing post-legalize DAG combining, run the legalizer over each node in the worklist prior to combining. This allows the combiner to produce new nodes which need to go back through legalization. This is particularly useful when generating operands to target specific nodes in a post-legalize DAG combine where the operands are significantly easier to express as pre-legalized operations. My immediate use case will be PSHUFB formation where we need to build a constant shuffle mask with a build_vector node. This also refactors the relevant functionality in the legalizer to support this, and updates relevant tests. I've spoken to the R600 folks and these changes look like improvements to them. The avx512 change needs to be investigated, I suspect there is a disagreement between the legalizer and the DAG combiner there, but it seems a minor issue so leaving it to be re-evaluated after this patch. Differential Revision: http://reviews.llvm.org/D4564 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214020 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/SelectionDAG.h | 22 ++++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 ++- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 150 ++++++++++++++--------- test/CodeGen/R600/fp_to_sint.ll | 28 ++--- test/CodeGen/R600/fp_to_uint.ll | 28 ++--- test/CodeGen/R600/setcc-equivalent.ll | 3 +- test/CodeGen/X86/avx512-cmp.ll | 5 +- 7 files changed, 160 insertions(+), 94 deletions(-) diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index b0e92077747..67cc4341905 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -16,6 +16,7 @@ #define LLVM_CODEGEN_SELECTIONDAG_H #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/ilist.h" #include "llvm/CodeGen/DAGCombine.h" @@ -364,6 +365,27 @@ public: /// the graph. void Legalize(); + /// \brief Transforms a SelectionDAG node and any operands to it into a node + /// that is compatible with the target instruction selector, as indicated by + /// the TargetLowering object. + /// + /// \returns true if \c N is a valid, legal node after calling this. + /// + /// This essentially runs a single recursive walk of the \c Legalize process + /// over the given node (and its operands). This can be used to incrementally + /// legalize the DAG. All of the nodes which are directly replaced, + /// potentially including N, are added to the output parameter \c + /// UpdatedNodes so that the delta to the DAG can be understood by the + /// caller. + /// + /// When this returns false, N has been legalized in a way that make the + /// pointer passed in no longer valid. It may have even been deleted from the + /// DAG, and so it shouldn't be used further. When this returns true, the + /// N passed in is a legal node, and can be immediately processed as such. + /// This may still have done some work on the DAG, and will still populate + /// UpdatedNodes with any new nodes replacing those originally in the DAG. + bool LegalizeOp(SDNode *N, SmallSetVector &UpdatedNodes); + /// LegalizeVectors - This transforms the SelectionDAG into a SelectionDAG /// that only uses vector math operations supported by the target. This is /// necessary as a separate step from Legalize because unrolling a vector diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 275790f37c0..8b1854e025c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1136,10 +1136,6 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // changes of the root. HandleSDNode Dummy(DAG.getRoot()); - // The root of the dag may dangle to deleted nodes until the dag combiner is - // done. Set it to null to avoid confusion. - DAG.setRoot(SDValue()); - // while the worklist isn't empty, find a node and // try and combine it. while (!WorklistMap.empty()) { @@ -1173,6 +1169,20 @@ void DAGCombiner::Run(CombineLevel AtLevel) { WorklistRemover DeadNodes(*this); + // If this combine is running after legalizing the DAG, re-legalize any + // nodes pulled off the worklist. + if (Level == AfterLegalizeDAG) { + SmallSetVector UpdatedNodes; + bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); + + for (SDNode *LN : UpdatedNodes) { + AddToWorklist(LN); + AddUsersToWorklist(LN); + } + if (!NIsValid) + continue; + } + SDValue RV = combine(N); if (!RV.getNode()) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 1b2f36c6e41..a5ff5f9f299 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -53,11 +54,16 @@ class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { const TargetLowering &TLI; SelectionDAG &DAG; - /// LegalizePosition - The iterator for walking through the node list. - SelectionDAG::allnodes_iterator LegalizePosition; + /// \brief The iterator being used to walk the DAG. We hold a reference to it + /// in order to update it as necessary on node deletion. + SelectionDAG::allnodes_iterator &LegalizePosition; - /// LegalizedNodes - The set of nodes which have already been legalized. - SmallPtrSet LegalizedNodes; + /// \brief The set of nodes which have already been legalized. We hold a + /// reference to it in order to update as necessary on node deletion. + SmallPtrSetImpl &LegalizedNodes; + + /// \brief A set of all the nodes updated during legalization. + SmallSetVector *UpdatedNodes; EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(*DAG.getContext(), VT); @@ -66,14 +72,19 @@ class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { // Libcall insertion helpers. public: - explicit SelectionDAGLegalize(SelectionDAG &DAG); - - void LegalizeDAG(); - -private: - /// LegalizeOp - Legalizes the given operation. + SelectionDAGLegalize(SelectionDAG &DAG, + SelectionDAG::allnodes_iterator &LegalizePosition, + SmallPtrSetImpl &LegalizedNodes, + SmallSetVector *UpdatedNodes = nullptr) + : SelectionDAG::DAGUpdateListener(DAG), TM(DAG.getTarget()), + TLI(DAG.getTargetLoweringInfo()), DAG(DAG), + LegalizePosition(LegalizePosition), LegalizedNodes(LegalizedNodes), + UpdatedNodes(UpdatedNodes) {} + + /// \brief Legalizes the given operation. void LegalizeOp(SDNode *Node); +private: SDValue OptimizeFloatStore(StoreSDNode *ST); void LegalizeLoadOps(SDNode *Node); @@ -149,6 +160,8 @@ private: LegalizedNodes.erase(N); if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) ++LegalizePosition; + if (UpdatedNodes) + UpdatedNodes->remove(N); } public: @@ -168,14 +181,25 @@ public: } void ReplaceNode(SDNode *Old, SDNode *New) { DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i)); + if (UpdatedNodes) + UpdatedNodes->insert(New); ReplacedNode(Old); } void ReplaceNode(SDValue Old, SDValue New) { DAG.ReplaceAllUsesWith(Old, New); + DAG.TransferDbgValues(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); ReplacedNode(Old.getNode()); } void ReplaceNode(SDNode *Old, const SDValue *New) { DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Old, i), New[i]); + if (UpdatedNodes) + UpdatedNodes->insert(New->getNode()); ReplacedNode(Old); } }; @@ -213,40 +237,6 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); } -SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) - : SelectionDAG::DAGUpdateListener(dag), - TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), - DAG(dag) { -} - -void SelectionDAGLegalize::LegalizeDAG() { - DAG.AssignTopologicalOrder(); - - // Visit all the nodes. We start in topological order, so that we see - // nodes with their original operands intact. Legalization can produce - // new nodes which may themselves need to be legalized. Iterate until all - // nodes have been legalized. - for (;;) { - bool AnyLegalized = false; - for (LegalizePosition = DAG.allnodes_end(); - LegalizePosition != DAG.allnodes_begin(); ) { - --LegalizePosition; - - SDNode *N = LegalizePosition; - if (LegalizedNodes.insert(N)) { - AnyLegalized = true; - LegalizeOp(N); - } - } - if (!AnyLegalized) - break; - - } - - // Remove dead nodes now. - DAG.RemoveDeadNodes(); -} - /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or /// a load from the constant pool. SDValue @@ -928,6 +918,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(RVal.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); + if (UpdatedNodes) { + UpdatedNodes->insert(RVal.getNode()); + UpdatedNodes->insert(RChain.getNode()); + } ReplacedNode(Node); } return; @@ -1148,6 +1142,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(Value.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Value.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } ReplacedNode(Node); } } @@ -1335,10 +1333,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } if (NewNode != Node) { - DAG.ReplaceAllUsesWith(Node, NewNode); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); - ReplacedNode(Node); + ReplaceNode(Node, NewNode); Node = NewNode; } switch (Action) { @@ -1356,12 +1351,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { else ResultVals.push_back(Res.getValue(i)); } - if (Res.getNode() != Node || Res.getResNo() != 0) { - DAG.ReplaceAllUsesWith(Node, ResultVals.data()); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); - ReplacedNode(Node); - } + if (Res.getNode() != Node || Res.getResNo() != 0) + ReplaceNode(Node, ResultVals.data()); return; } } @@ -4179,6 +4170,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // use the new one. DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Tmp2.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } ReplacedNode(Node); break; } @@ -4285,7 +4280,48 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // SelectionDAG::Legalize - This is the entry point for the file. // void SelectionDAG::Legalize() { - /// run - This is the main entry point to this class. - /// - SelectionDAGLegalize(*this).LegalizeDAG(); + AssignTopologicalOrder(); + + allnodes_iterator LegalizePosition; + SmallPtrSet LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizePosition, LegalizedNodes); + + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (LegalizePosition = allnodes_end(); + LegalizePosition != allnodes_begin(); ) { + --LegalizePosition; + + SDNode *N = LegalizePosition; + if (LegalizedNodes.insert(N)) { + AnyLegalized = true; + Legalizer.LegalizeOp(N); + } + } + if (!AnyLegalized) + break; + + } + + // Remove dead nodes now. + RemoveDeadNodes(); +} + +bool SelectionDAG::LegalizeOp(SDNode *N, + SmallSetVector &UpdatedNodes) { + allnodes_iterator LegalizePosition(N); + SmallPtrSet LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizePosition, LegalizedNodes, + &UpdatedNodes); + + // Directly insert the node in question, and legalize it. This will recurse + // as needed through operands. + LegalizedNodes.insert(N); + Legalizer.LegalizeOp(N); + + return LegalizedNodes.count(N); } diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll index 235045aaaaa..62d68f473ea 100644 --- a/test/CodeGen/R600/fp_to_sint.ll +++ b/test/CodeGen/R600/fp_to_sint.ll @@ -49,8 +49,8 @@ define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac ; EG-DAG: XOR_INT ; EG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; Check that the compiler doesn't crash with a "cannot select" error ; SI: S_ENDPGM @@ -81,8 +81,8 @@ entry: ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -102,8 +102,8 @@ entry: ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; SI: S_ENDPGM define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { @@ -132,8 +132,8 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -153,8 +153,8 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -174,8 +174,8 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -195,8 +195,8 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; SI: S_ENDPGM define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll index a13018bdfec..90f42f42448 100644 --- a/test/CodeGen/R600/fp_to_uint.ll +++ b/test/CodeGen/R600/fp_to_uint.ll @@ -52,8 +52,8 @@ define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac ; EG-DAG: XOR_INT ; EG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; SI: S_ENDPGM define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) { @@ -82,8 +82,8 @@ define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) { ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -103,8 +103,8 @@ define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) { ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; SI: S_ENDPGM define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { @@ -133,8 +133,8 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -154,8 +154,8 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -175,8 +175,8 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; EG-DAG: AND_INT ; EG-DAG: LSHR ; EG-DAG: SUB_INT @@ -196,8 +196,8 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { ; EG-DAG: XOR_INT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT -; EG-DAG: CNDGE_INT -; EG-DAG: CNDGE_INT +; EG-DAG: CNDE_INT +; EG-DAG: CNDE_INT ; SI: S_ENDPGM define void @fp_to_uint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { diff --git a/test/CodeGen/R600/setcc-equivalent.ll b/test/CodeGen/R600/setcc-equivalent.ll index f796748fcef..e21cecf0534 100644 --- a/test/CodeGen/R600/setcc-equivalent.ll +++ b/test/CodeGen/R600/setcc-equivalent.ll @@ -1,5 +1,4 @@ ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s -; XFAIL: * ; EG-LABEL: @and_setcc_setcc_i32 ; EG: AND_INT @@ -28,4 +27,4 @@ define void @and_setcc_setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, < %ext = sext <4 x i1> %and to <4 x i32> store <4 x i32> %ext, <4 x i32> addrspace(1)* %out, align 4 ret void -} \ No newline at end of file +} diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll index 47e50a93796..a44cb9d02f2 100644 --- a/test/CodeGen/X86/avx512-cmp.ll +++ b/test/CodeGen/X86/avx512-cmp.ll @@ -28,10 +28,9 @@ l2: ret float %c1 } +; FIXME: Can use vcmpeqss and extract from the mask here in AVX512. ; CHECK-LABEL: test3 -; CHECK: vcmpeqss -; CHECK: kmov -; CHECK: ret +; CHECK: vucomiss {{.*}}encoding: [0x62 define i32 @test3(float %a, float %b) { %cmp10.i = fcmp oeq float %a, %b -- 2.40.0