From 972c81b9ec5e4b5a19c18dc9a45bf95d5ba6c857 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Sun, 9 Jul 2017 21:14:36 +0000 Subject: [PATCH] [analyzer] Faster hashing of subsequences in CompoundStmts. Summary: This patches improves the hashing subsequences in CompoundStmts by incrementally hashing all subsequences with the same starting position. This results in a reduction of the time for this constraint while running over SQLite from 1.10 seconds to 0.55 seconds (-50%). Reviewers: NoQ Reviewed By: NoQ Subscribers: cfe-commits, xazax.hun, v.g.vassilev Differential Revision: https://reviews.llvm.org/D34364 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@307509 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/CloneDetection.cpp | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/lib/Analysis/CloneDetection.cpp b/lib/Analysis/CloneDetection.cpp index e698d3e5c5..5ea74989a7 100644 --- a/lib/Analysis/CloneDetection.cpp +++ b/lib/Analysis/CloneDetection.cpp @@ -239,16 +239,27 @@ size_t RecursiveCloneTypeIIConstraint::saveHash( } if (CS) { - for (unsigned Length = 2; Length <= CS->size(); ++Length) { - for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) { - llvm::MD5 Hash; - for (unsigned i = Pos; i < Pos + Length; ++i) { - size_t ChildHash = ChildHashes[i]; - Hash.update(StringRef(reinterpret_cast(&ChildHash), - sizeof(ChildHash))); + // If we're in a CompoundStmt, we hash all possible combinations of child + // statements to find clones in those subsequences. + // We first go through every possible starting position of a subsequence. + for (unsigned Pos = 0; Pos < CS->size(); ++Pos) { + // Then we try all possible lengths this subsequence could have and + // reuse the same hash object to make sure we only hash every child + // hash exactly once. + llvm::MD5 Hash; + for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) { + // Grab the current child hash and put it into our hash. We do + // -1 on the index because we start counting the length at 1. + size_t ChildHash = ChildHashes[Pos + Length - 1]; + Hash.update( + StringRef(reinterpret_cast(&ChildHash), sizeof(ChildHash))); + // If we have at least two elements in our subsequence, we can start + // saving it. + if (Length > 1) { + llvm::MD5 SubHash = Hash; + StmtsByHash.push_back(std::make_pair( + createHash(SubHash), StmtSequence(CS, D, Pos, Pos + Length))); } - StmtsByHash.push_back(std::make_pair( - createHash(Hash), StmtSequence(CS, D, Pos, Pos + Length))); } } } -- 2.40.0