From: Clement Courbet Date: Wed, 2 Jan 2019 09:21:00 +0000 (+0000) Subject: Revert rL350035 "[llvm-exegesis] Clustering: don't enqueue a point multiple times" X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=14e0643d98eb35e326259bf79007170567d568fd;p=llvm Revert rL350035 "[llvm-exegesis] Clustering: don't enqueue a point multiple times" Let's discuss this on the review thread before submitting. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350207 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/tools/llvm-exegesis/lib/Clustering.cpp b/tools/llvm-exegesis/lib/Clustering.cpp index 56b1a939c41..b2cd97c12eb 100644 --- a/tools/llvm-exegesis/lib/Clustering.cpp +++ b/tools/llvm-exegesis/lib/Clustering.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Clustering.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include @@ -91,14 +92,8 @@ llvm::Error InstructionBenchmarkClustering::validateAndSetup() { } void InstructionBenchmarkClustering::dbScan(const size_t MinPts) { - const size_t NumPoints = Points_.size(); - - // Persistent buffers to avoid allocs. - std::vector Neighbors; - std::vector ToProcess(NumPoints); - std::vector Processed(NumPoints); - - for (size_t P = 0; P < NumPoints; ++P) { + std::vector Neighbors; // Persistent buffer to avoid allocs. + for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { if (!ClusterIdForPoint_[P].isUndef()) continue; // Previously processed in inner loop. rangeQuery(P, Neighbors); @@ -114,40 +109,43 @@ void InstructionBenchmarkClustering::dbScan(const size_t MinPts) { Cluster &CurrentCluster = Clusters_.back(); ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */ CurrentCluster.PointIndices.push_back(P); - Processed[P] = 1; - // Enqueue P's neighbors. - size_t Tail = 0; - auto EnqueueUnprocessed = [&](const std::vector &Neighbors) { - for (size_t Q : Neighbors) - if (!Processed[Q]) { - ToProcess[Tail++] = Q; - Processed[Q] = 1; - } - }; - EnqueueUnprocessed(Neighbors); - - for (size_t Head = 0; Head < Tail; ++Head) { - // Retrieve a point from the queue and add it to the current cluster. - P = ToProcess[Head]; - ClusterId OldCID = ClusterIdForPoint_[P]; - ClusterIdForPoint_[P] = CurrentCluster.Id; - CurrentCluster.PointIndices.push_back(P); - if (OldCID.isNoise()) + // Process P's neighbors. + llvm::SetVector> ToProcess; + ToProcess.insert(Neighbors.begin(), Neighbors.end()); + while (!ToProcess.empty()) { + // Retrieve a point from the set. + const size_t Q = *ToProcess.begin(); + ToProcess.erase(ToProcess.begin()); + + if (ClusterIdForPoint_[Q].isNoise()) { + // Change noise point to border point. + ClusterIdForPoint_[Q] = CurrentCluster.Id; + CurrentCluster.PointIndices.push_back(Q); continue; - assert(OldCID.isUndef()); - - // And extend to the neighbors of P if the region is dense enough. - rangeQuery(P, Neighbors); - if (Neighbors.size() + 1 >= MinPts) - EnqueueUnprocessed(Neighbors); + } + if (!ClusterIdForPoint_[Q].isUndef()) { + continue; // Previously processed. + } + // Add Q to the current custer. + ClusterIdForPoint_[Q] = CurrentCluster.Id; + CurrentCluster.PointIndices.push_back(Q); + // And extend to the neighbors of Q if the region is dense enough. + rangeQuery(Q, Neighbors); + if (Neighbors.size() + 1 >= MinPts) { + ToProcess.insert(Neighbors.begin(), Neighbors.end()); + } } } + // assert(Neighbors.capacity() == (Points_.size() - 1)); + // ^ True, but it is not quaranteed to be true in all the cases. // Add noisy points to noise cluster. - for (size_t P = 0; P < NumPoints; ++P) - if (ClusterIdForPoint_[P].isNoise()) + for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { + if (ClusterIdForPoint_[P].isNoise()) { NoiseCluster_.PointIndices.push_back(P); + } + } } llvm::Expected