From: Nirav Dave Date: Wed, 2 Aug 2017 16:35:58 +0000 (+0000) Subject: [DAG] Improve candidate pruning in store merge failure case. NFCI X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=29e9cdbd0d70fac18c2e6c652b3e9cdc6ceae064;p=llvm [DAG] Improve candidate pruning in store merge failure case. NFCI During store merge we construct a sorted list of consecutive store candidates and consider subsequences for merging into a single store. For each subsequence we check if the stored value type is legal the merged store would have valid and fast and if the constructed value to be stored is valid. The only properties that affect this check between subsequences is the size of the subsequence, the alignment of the first store, the alignment of the stored load value (when merging stores-of-loads), and whether the merged value is a constant zero. If we do not find a viable mergeable subsequence starting from the first store of length N, we know that a subsequence starting at a later store of length N will also fail unless the new store's alignment, the new load's alignment (if we're merging store-of-loads), or we've dropped stores of nonzero value and could construct a merged stores of zero (for merging constants). As a result if we fail to find a valid subsequence starting from the first store we can safely skip considering subsequences that start with subsequent stores unless one of the above properties is true. This significantly (2x) improves compile time in some pathological cases. Reviewers: RKSimon, efriedma, zvi, spatel, waltl Subscribers: grandinj, llvm-commits Differential Revision: https://reviews.llvm.org/D35901 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309830 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 59dc714d2ea..35a718e1fbf 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12803,19 +12803,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { unsigned LastLegalVectorType = 1; bool LastIntegerTrunc = false; bool NonZero = false; + unsigned FirstZeroAfterNonZero = NumConsecutiveStores; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *ST = cast(StoreNodes[i].MemNode); SDValue StoredVal = ST->getValue(); - - if (ConstantSDNode *C = dyn_cast(StoredVal)) { - NonZero |= !C->isNullValue(); - } else if (ConstantFPSDNode *C = - dyn_cast(StoredVal)) { - NonZero |= !C->getConstantFPValue()->isNullValue(); - } else { - // Non-constant. - break; + bool IsElementZero = false; + if (ConstantSDNode *C = dyn_cast(StoredVal)) + IsElementZero = C->isNullValue(); + else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) + IsElementZero = C->getConstantFPValue()->isNullValue(); + if (IsElementZero) { + if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) + FirstZeroAfterNonZero = i; } + NonZero |= !IsElementZero; // Find a legal type for the constant store. unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; @@ -12861,23 +12862,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } } + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + // Check if we found a legal integer type that creates a meaningful merge. - if (LastLegalType < 2 && LastLegalVectorType < 2) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved or we've dropped a non-zero value. Drop as many + // candidates as we can here. + unsigned NumSkip = 1; + while ( + (NumSkip < NumConsecutiveStores) && + (NumSkip < FirstZeroAfterNonZero) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) { + NumSkip++; + } + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); continue; } - bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; - unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - bool Merged = MergeStoresOfConstantsOrVecElts( StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); - if (!Merged) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - continue; - } + RV |= Merged; + // Remove merged stores for next iteration. - RV = true; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); continue; } @@ -12914,6 +12926,23 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { NumStoresToMerge = i + 1; } + // Check if we found a legal integer type that creates a meaningful merge. + if (NumStoresToMerge < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved. Drop as many candidates as we can here. + unsigned NumSkip = 1; + while ((NumSkip < NumConsecutiveStores) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + continue; + } + bool Merged = MergeStoresOfConstantsOrVecElts( StoreNodes, MemVT, NumStoresToMerge, false, true, false); if (!Merged) { @@ -13081,7 +13110,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { NumElem = std::min(LastLegalType, NumElem); if (NumElem < 2) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have is if the alignment or either + // the load or store has improved. Drop as many candidates as we + // can here. + unsigned NumSkip = 1; + while ((NumSkip < LoadNodes.size()) && + (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); continue; }