/// During bottom-up matching, match only nodes with at least this value as
/// the ratio of their common descendants.
- double MinSimilarity = 0.2;
+ double MinSimilarity = 0.5;
/// Whenever two subtrees are matched in the bottom-up phase, the optimal
/// mapping is computed, unless the size of either subtrees exceeds this.
int MaxSize = 100;
+ bool StopAfterTopDown = false;
+
/// Returns false if the nodes should never be matched.
bool isMatchingAllowed(const Node &N1, const Node &N2) const {
return N1.getType().isSame(N2.getType());
// Computes the ratio of common descendants between the two nodes.
// Descendants are only considered to be equal when they are mapped in M.
- double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const;
+ double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const;
// Returns the node that has the highest degree of similarity.
NodeId findCandidate(const Mapping &M, NodeId Id1) const;
void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1,
NodeId Id2) const {
- if (std::max(T1.getNumberOfDescendants(Id1),
- T2.getNumberOfDescendants(Id2)) >= Options.MaxSize)
+ if (std::max(T1.getNumberOfDescendants(Id1), T2.getNumberOfDescendants(Id2)) >
+ Options.MaxSize)
return;
ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2);
std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes();
}
}
-double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1,
- NodeId Id2) const {
- if (Id1.isInvalid() || Id2.isInvalid())
- return 0.0;
+double ASTDiff::Impl::getJaccardSimilarity(const Mapping &M, NodeId Id1,
+ NodeId Id2) const {
int CommonDescendants = 0;
const Node &N1 = T1.getNode(Id1);
- for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id)
- CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2));
- return 2.0 * CommonDescendants /
- (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2));
+ // Count the common descendants, excluding the subtree root.
+ for (NodeId Src = Id1 + 1; Src <= N1.RightMostDescendant; ++Src) {
+ NodeId Dst = M.getDst(Src);
+ CommonDescendants += int(Dst.isValid() && T2.isInSubtree(Dst, Id2));
+ }
+ // We need to subtract 1 to get the number of descendants excluding the root.
+ double Denominator = T1.getNumberOfDescendants(Id1) - 1 +
+ T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants;
+ // CommonDescendants is less than the size of one subtree.
+ assert(Denominator >= 0 && "Expected non-negative denominator.");
+ if (Denominator == 0)
+ return 0;
+ return CommonDescendants / Denominator;
}
NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const {
continue;
if (M.hasDst(Id2))
continue;
- double Similarity = getSimilarity(M, Id1, Id2);
+ double Similarity = getJaccardSimilarity(M, Id1, Id2);
if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) {
HighestSimilarity = Similarity;
Candidate = Id2;
}
break;
}
- const Node &N1 = T1.getNode(Id1);
bool Matched = M.hasSrc(Id1);
+ const Node &N1 = T1.getNode(Id1);
bool MatchedChildren =
std::any_of(N1.Children.begin(), N1.Children.end(),
[&](NodeId Child) { return M.hasSrc(Child); });
void ASTDiff::Impl::computeMapping() {
TheMapping = matchTopDown();
+ if (Options.StopAfterTopDown)
+ return;
matchBottomUp(TheMapping);
}
--- /dev/null
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied.
+
+#ifndef DEST
+
+void f1() { ; {{;}} }
+void f2() { ;; {{;}} }
+
+#else
+
+// Jaccard similarity threshold is 0.5.
+
+void f1() {
+// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5
+// Jaccard similarity = 3 / (4 + 5 - 3) = 3 / 6 >= 0.5
+// CHECK: Match FunctionDecl: f1(void (void))(1) to FunctionDecl: f1(void (void))(1)
+// CHECK: Match CompoundStmt(2) to CompoundStmt(2)
+// CHECK: Match CompoundStmt(4) to CompoundStmt(3)
+// CHECK: Match CompoundStmt(5) to CompoundStmt(4)
+// CHECK: Match NullStmt(6) to NullStmt(5)
+ {{;}} ;;
+}
+
+void f2() {
+// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5
+// Jaccard similarity = 3 / (5 + 6 - 3) = 3 / 8 < 0.5
+// CHECK-NOT: Match FunctionDecl(9)
+// CHECK-NOT: Match CompoundStmt(10)
+// CHECK: Match CompoundStmt(11) to CompoundStmt(10)
+// CHECK: Match CompoundStmt(12) to CompoundStmt(11)
+// CHECK: Match NullStmt(13) to NullStmt(12)
+// CHECK-NOT: Match NullStmt(13)
+ {{;}} ;;;
+}
+
+#endif
--- /dev/null
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the behaviour of the matching according to the optimal tree edit
+// distance, implemented with Zhang and Shasha's algorithm.
+// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than
+// this size will not be processed by the optimal algorithm.
+
+#ifndef DEST
+
+void f1() { {;} {{;}} }
+
+void f2() { {;} {{;;;;;}} }
+
+void f3() { {;} {{;;;;;;}} }
+
+#else
+
+void f1() {
+// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5
+// The optimal matching algorithm should move the ; into the outer block
+// CHECK: Match CompoundStmt(2) to CompoundStmt(2)
+// CHECK-NOT: Match CompoundStmt(3)
+// CHECK-NEXT: Match NullStmt(4) to NullStmt(3)
+ ; {{;}}
+}
+
+void f2() {
+ // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5
+ // As none of the subtrees is bigger than 10 nodes, the optimal algorithm
+ // will be run.
+ // CHECK: Match NullStmt(11) to NullStmt(9)
+ ;; {{;;;;;}}
+}
+
+void f3() {
+ // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5
+ // As the subtrees are bigger than 10 nodes, the optimal algorithm will not
+ // be run.
+ // CHECK: Delete NullStmt(22)
+ ;; {{;;;;;;}}
+}
+#endif
--- /dev/null
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the top-down matching of identical subtrees only.
+
+#ifndef DEST
+
+void f1()
+{
+ // Match some subtree of height greater than 2.
+ // CHECK: Match CompoundStmt(3) to CompoundStmt(3)
+ // CHECK: Match CompoundStmt(4) to CompoundStmt(4)
+ // CHECK: Match NullStmt(5) to NullStmt(5)
+ {{;}}
+
+ // Don't match subtrees that are smaller.
+ // CHECK-NOT: Match CompoundStmt(6)
+ // CHECK-NOT: Match NullStmt(7)
+ {;}
+
+ // Greedy approach - use the first matching subtree when there are multiple
+ // identical subtrees.
+ // CHECK: Match CompoundStmt(8) to CompoundStmt(8)
+ // CHECK: Match CompoundStmt(9) to CompoundStmt(9)
+ // CHECK: Match NullStmt(10) to NullStmt(10)
+ {{;;}}
+}
+
+#else
+
+void f1() {
+
+ {{;}}
+
+ {;}
+
+ {{;;}}
+ // CHECK-NOT: Match {{.*}} to CompoundStmt(11)
+ // CHECK-NOT: Match {{.*}} to CompoundStmt(12)
+ // CHECK-NOT: Match {{.*}} to NullStmt(13)
+ {{;;}}
+
+ // CHECK-NOT: Match {{.*}} to NullStmt(14)
+ ;
+}
+
+#endif
cl::Optional,
cl::cat(ClangDiffCategory));
+static cl::opt<std::string> StopAfter("stop-after",
+ cl::desc("<topdown|bottomup>"),
+ cl::Optional, cl::init(""),
+ cl::cat(ClangDiffCategory));
+
static cl::opt<int> MaxSize("s", cl::desc("<maxsize>"), cl::Optional,
cl::init(-1), cl::cat(ClangDiffCategory));
diff::ComparisonOptions Options;
if (MaxSize != -1)
Options.MaxSize = MaxSize;
+ if (!StopAfter.empty()) {
+ if (StopAfter == "topdown")
+ Options.StopAfterTopDown = true;
+ else if (StopAfter != "bottomup") {
+ llvm::errs() << "Error: Invalid argument for -stop-after\n";
+ return 1;
+ }
+ }
diff::SyntaxTree SrcTree(Src->getASTContext());
diff::SyntaxTree DstTree(Dst->getASTContext());
diff::ASTDiff Diff(SrcTree, DstTree, Options);