namespace clang {
namespace diff {
+/// This represents a match between two nodes in the source and destination
+/// trees, meaning that they are likely to be related.
+struct Match {
+ NodeId Src, Dst;
+};
+
enum ChangeKind {
- None,
- Delete, // (Src): delete node Src.
- Update, // (Src, Dst): update the value of node Src to match Dst.
- Insert, // (Src, Dst, Pos): insert Src as child of Dst at offset Pos.
- Move, // (Src, Dst, Pos): move Src to be a child of Dst at offset Pos.
- UpdateMove // Same as Move plus Update.
+ Delete, // (Src): delete node Src.
+ Update, // (Src, Dst): update the value of node Src to match Dst.
+ Insert, // (Src, Dst, Pos): insert Src as child of Dst at offset Pos.
+ Move // (Src, Dst, Pos): move Src to be a child of Dst at offset Pos.
+};
+
+struct Change {
+ ChangeKind Kind;
+ NodeId Src, Dst;
+ size_t Position;
+
+ Change(ChangeKind Kind, NodeId Src, NodeId Dst, size_t Position)
+ : Kind(Kind), Src(Src), Dst(Dst), Position(Position) {}
+ Change(ChangeKind Kind, NodeId Src) : Kind(Kind), Src(Src) {}
+ Change(ChangeKind Kind, NodeId Src, NodeId Dst)
+ : Kind(Kind), Src(Src), Dst(Dst) {}
};
/// Represents a Clang AST node, alongside some additional information.
struct Node {
NodeId Parent, LeftMostDescendant, RightMostDescendant;
- int Depth, Height, Shift = 0;
+ int Depth, Height;
ast_type_traits::DynTypedNode ASTNode;
SmallVector<NodeId, 4> Children;
- ChangeKind ChangeKind = None;
ast_type_traits::ASTNodeKind getType() const;
StringRef getTypeLabel() const;
ASTDiff(SyntaxTree &Src, SyntaxTree &Dst, const ComparisonOptions &Options);
~ASTDiff();
- // Returns the ID of the node that is mapped to the given node in SourceTree.
- NodeId getMapped(const SyntaxTree &SourceTree, NodeId Id) const;
+ // Returns a list of matches.
+ std::vector<Match> getMatches();
+ /// Returns an edit script.
+ std::vector<Change> getChanges();
+
+ // Prints an edit action.
+ void printChange(raw_ostream &OS, const Change &Chg) const;
+ // Prints a match between two nodes.
+ void printMatch(raw_ostream &OS, const Match &M) const;
class Impl;
const ASTContext &getASTContext() const;
StringRef getFilename() const;
- int getSize() const;
- NodeId getRootId() const;
- using PreorderIterator = NodeId;
- PreorderIterator begin() const;
- PreorderIterator end() const;
-
const Node &getNode(NodeId Id) const;
- int findPositionInParent(NodeId Id) const;
+ NodeId getRootId() const;
// Returns the starting and ending offset of the node in its source file.
std::pair<unsigned, unsigned> getSourceRangeOffsets(const Node &N) const;
/// Serialize the node attributes to a string representation. This should
/// uniquely distinguish nodes of the same kind. Note that this function just
/// returns a representation of the node value, not considering descendants.
- std::string getNodeValue(NodeId Id) const;
- std::string getNodeValue(const Node &Node) const;
+ std::string getNodeValue(const DynTypedNode &DTN) const;
class Impl;
std::unique_ptr<Impl> TreeImpl;
bool EnableMatchingWithUnmatchableParents = false;
/// Returns false if the nodes should never be matched.
- bool isMatchingAllowed(const Node &N1, const Node &N2) const {
- return N1.getType().isSame(N2.getType());
+ bool isMatchingAllowed(const DynTypedNode &N1, const DynTypedNode &N2) const {
+ return N1.getNodeKind().isSame(N2.getNodeKind());
}
};
class ASTDiff::Impl {
public:
SyntaxTree::Impl &T1, &T2;
+ bool IsMappingDone = false;
Mapping TheMapping;
Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2,
- const ComparisonOptions &Options);
+ const ComparisonOptions &Options)
+ : T1(T1), T2(T2), Options(Options) {}
/// Matches nodes one-by-one based on their similarity.
void computeMapping();
- // Compute ChangeKind for each node based on similarity.
- void computeChangeKinds(Mapping &M);
+ std::vector<Match> getMatches(Mapping &M);
- NodeId getMapped(const std::unique_ptr<SyntaxTree::Impl> &Tree,
- NodeId Id) const {
- if (&*Tree == &T1)
- return TheMapping.getDst(Id);
- assert(&*Tree == &T2 && "Invalid tree.");
- return TheMapping.getSrc(Id);
- }
+ /// Finds an edit script that converts T1 to T2.
+ std::vector<Change> computeChanges(Mapping &M);
+
+ void printChangeImpl(raw_ostream &OS, const Change &Chg) const;
+ void printMatchImpl(raw_ostream &OS, const Match &M) const;
+
+ // Returns a mapping of identical subtrees.
+ Mapping matchTopDown() const;
private:
// Returns true if the two subtrees are identical.
// Returns false if the nodes must not be mached.
bool isMatchingPossible(NodeId Id1, NodeId Id2) const;
- // Returns true if the nodes' parents are matched.
- bool haveSameParents(const Mapping &M, NodeId Id1, NodeId Id2) const;
-
// Uses an optimal albeit slow algorithm to compute a mapping between two
// subtrees, but only if both have fewer nodes than MaxSize.
void addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const;
// Returns the node that has the highest degree of similarity.
NodeId findCandidate(const Mapping &M, NodeId Id1) const;
- // Returns a mapping of identical subtrees.
- Mapping matchTopDown() const;
-
// Tries to match any yet unmapped nodes, in a bottom-up fashion.
void matchBottomUp(Mapping &M) const;
std::vector<NodeId> Leaves;
// Maps preorder indices to postorder ones.
std::vector<int> PostorderIds;
- std::vector<NodeId> NodesBfs;
int getSize() const { return Nodes.size(); }
NodeId getRootId() const { return 0; }
- PreorderIterator begin() const { return getRootId(); }
- PreorderIterator end() const { return getSize(); }
const Node &getNode(NodeId Id) const { return Nodes[Id]; }
Node &getMutableNode(NodeId Id) { return Nodes[Id]; }
void addNode(Node &N) { Nodes.push_back(N); }
int getNumberOfDescendants(NodeId Id) const;
bool isInSubtree(NodeId Id, NodeId SubtreeRoot) const;
- int findPositionInParent(NodeId Id, bool Shifted = false) const;
std::string getNodeValue(NodeId Id) const;
- std::string getNodeValue(const Node &Node) const;
+ std::string getNodeValue(const DynTypedNode &DTN) const;
+ /// Prints the node as "<type>[: <value>](<postorder-id)"
+ void printNode(NodeId Id) const { printNode(llvm::outs(), Id); }
+ void printNode(raw_ostream &OS, NodeId Id) const;
+
+ void printTree() const;
+ void printTree(NodeId Root) const;
+ void printTree(raw_ostream &OS, NodeId Root) const;
private:
/// Nodes in preorder.
initTree();
}
-static std::vector<NodeId> getSubtreePostorder(const SyntaxTree::Impl &Tree,
- NodeId Root) {
- std::vector<NodeId> Postorder;
- std::function<void(NodeId)> Traverse = [&](NodeId Id) {
- const Node &N = Tree.getNode(Id);
- for (NodeId Child : N.Children)
- Traverse(Child);
- Postorder.push_back(Id);
- };
- Traverse(Root);
- return Postorder;
-}
-
-static std::vector<NodeId> getSubtreeBfs(const SyntaxTree::Impl &Tree,
- NodeId Root) {
- std::vector<NodeId> Ids;
- size_t Expanded = 0;
- Ids.push_back(Root);
- while (Expanded < Ids.size())
- for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children)
- Ids.push_back(Child);
- return Ids;
-}
-
void SyntaxTree::Impl::initTree() {
setLeftMostDescendants();
int PostorderId = 0;
++PostorderId;
};
PostorderTraverse(getRootId());
- NodesBfs = getSubtreeBfs(*this, getRootId());
}
void SyntaxTree::Impl::setLeftMostDescendants() {
}
}
+static std::vector<NodeId> getSubtreePostorder(const SyntaxTree::Impl &Tree,
+ NodeId Root) {
+ std::vector<NodeId> Postorder;
+ std::function<void(NodeId)> Traverse = [&](NodeId Id) {
+ const Node &N = Tree.getNode(Id);
+ for (NodeId Child : N.Children)
+ Traverse(Child);
+ Postorder.push_back(Id);
+ };
+ Traverse(Root);
+ return Postorder;
+}
+
+static std::vector<NodeId> getSubtreeBfs(const SyntaxTree::Impl &Tree,
+ NodeId Root) {
+ std::vector<NodeId> Ids;
+ size_t Expanded = 0;
+ Ids.push_back(Root);
+ while (Expanded < Ids.size())
+ for (NodeId Child : Tree.getNode(Ids[Expanded++]).Children)
+ Ids.push_back(Child);
+ return Ids;
+}
+
int SyntaxTree::Impl::getNumberOfDescendants(NodeId Id) const {
return getNode(Id).RightMostDescendant - Id + 1;
}
return Id >= SubtreeRoot && Id <= getNode(SubtreeRoot).RightMostDescendant;
}
-int SyntaxTree::Impl::findPositionInParent(NodeId Id, bool Shifted) const {
- NodeId Parent = getNode(Id).Parent;
- if (Parent.isInvalid())
- return 0;
- const auto &Siblings = getNode(Parent).Children;
- int Position = 0;
- for (size_t I = 0, E = Siblings.size(); I < E; ++I) {
- if (Shifted)
- Position += getNode(Siblings[I]).Shift;
- if (Siblings[I] == Id) {
- Position += I;
- return Position;
- }
- }
- llvm_unreachable("Node not found in parent's children.");
-}
-
std::string SyntaxTree::Impl::getNodeValue(NodeId Id) const {
- return getNodeValue(getNode(Id));
+ return getNodeValue(getNode(Id).ASTNode);
}
-std::string SyntaxTree::Impl::getNodeValue(const Node &N) const {
- const DynTypedNode &DTN = N.ASTNode;
+std::string SyntaxTree::Impl::getNodeValue(const DynTypedNode &DTN) const {
if (auto *X = DTN.get<BinaryOperator>())
return X->getOpcodeStr();
if (auto *X = DTN.get<AccessSpecDecl>()) {
llvm_unreachable("Fatal: unhandled AST node.\n");
}
+void SyntaxTree::Impl::printTree() const { printTree(getRootId()); }
+void SyntaxTree::Impl::printTree(NodeId Root) const {
+ printTree(llvm::outs(), Root);
+}
+
+void SyntaxTree::Impl::printTree(raw_ostream &OS, NodeId Root) const {
+ const Node &N = getNode(Root);
+ for (int I = 0; I < N.Depth; ++I)
+ OS << " ";
+ printNode(OS, Root);
+ OS << "\n";
+ for (NodeId Child : N.Children)
+ printTree(OS, Child);
+}
+
+void SyntaxTree::Impl::printNode(raw_ostream &OS, NodeId Id) const {
+ if (Id.isInvalid()) {
+ OS << "None";
+ return;
+ }
+ OS << getNode(Id).getTypeLabel();
+ if (getNodeValue(Id) != "")
+ OS << ": " << getNodeValue(Id);
+ OS << "(" << PostorderIds[Id] << ")";
+}
+
/// Identifies a node in a subtree by its postorder offset, starting at 1.
struct SNodeId {
int Id = 0;
}
bool ASTDiff::Impl::isMatchingPossible(NodeId Id1, NodeId Id2) const {
- return Options.isMatchingAllowed(T1.getNode(Id1), T2.getNode(Id2));
-}
-
-bool ASTDiff::Impl::haveSameParents(const Mapping &M, NodeId Id1,
- NodeId Id2) const {
- NodeId P1 = T1.getNode(Id1).Parent;
- NodeId P2 = T2.getNode(Id2).Parent;
- return (P1.isInvalid() && P2.isInvalid()) ||
- (P1.isValid() && P2.isValid() && M.getDst(P1) == P2);
+ return Options.isMatchingAllowed(T1.getNode(Id1).ASTNode,
+ T2.getNode(Id2).ASTNode);
}
void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1,
NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const {
NodeId Candidate;
double HighestSimilarity = 0.0;
- for (NodeId Id2 : T2) {
+ for (NodeId Id2 = 0, E = T2.getSize(); Id2 < E; ++Id2) {
if (!isMatchingPossible(Id1, Id2))
continue;
if (M.hasDst(Id2))
return M;
}
-ASTDiff::Impl::Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2,
- const ComparisonOptions &Options)
- : T1(T1), T2(T2), Options(Options) {
- computeMapping();
- computeChangeKinds(TheMapping);
-}
-
void ASTDiff::Impl::computeMapping() {
+ if (IsMappingDone)
+ return;
TheMapping = matchTopDown();
matchBottomUp(TheMapping);
+ IsMappingDone = true;
}
-void ASTDiff::Impl::computeChangeKinds(Mapping &M) {
- for (NodeId Id1 : T1) {
- if (!M.hasSrc(Id1)) {
- T1.getMutableNode(Id1).ChangeKind = Delete;
- T1.getMutableNode(Id1).Shift -= 1;
- }
- }
- for (NodeId Id2 : T2) {
- if (!M.hasDst(Id2)) {
- T2.getMutableNode(Id2).ChangeKind = Insert;
- T2.getMutableNode(Id2).Shift -= 1;
- }
- }
- for (NodeId Id1 : T1.NodesBfs) {
- NodeId Id2 = M.getDst(Id1);
- if (Id2.isInvalid())
- continue;
- if (!haveSameParents(M, Id1, Id2) ||
- T1.findPositionInParent(Id1, true) !=
- T2.findPositionInParent(Id2, true)) {
- T1.getMutableNode(Id1).Shift -= 1;
- T2.getMutableNode(Id2).Shift -= 1;
- }
- }
- for (NodeId Id2 : T2.NodesBfs) {
+std::vector<Match> ASTDiff::Impl::getMatches(Mapping &M) {
+ std::vector<Match> Matches;
+ for (NodeId Id1 = 0, Id2, E = T1.getSize(); Id1 < E; ++Id1)
+ if ((Id2 = M.getDst(Id1)).isValid())
+ Matches.push_back({Id1, Id2});
+ return Matches;
+}
+
+std::vector<Change> ASTDiff::Impl::computeChanges(Mapping &M) {
+ std::vector<Change> Changes;
+ for (NodeId Id2 : getSubtreeBfs(T2, T2.getRootId())) {
+ const Node &N2 = T2.getNode(Id2);
NodeId Id1 = M.getSrc(Id2);
- if (Id1.isInvalid())
- continue;
- Node &N1 = T1.getMutableNode(Id1);
- Node &N2 = T2.getMutableNode(Id2);
- if (Id1.isInvalid())
+ if (Id1.isValid()) {
+ assert(isMatchingPossible(Id1, Id2) && "Invalid matching.");
+ if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) {
+ Changes.emplace_back(Update, Id1, Id2);
+ }
continue;
- if (!haveSameParents(M, Id1, Id2) ||
- T1.findPositionInParent(Id1, true) !=
- T2.findPositionInParent(Id2, true)) {
- N1.ChangeKind = N2.ChangeKind = Move;
- }
- if (T1.getNodeValue(Id1) != T2.getNodeValue(Id2)) {
- N1.ChangeKind = N2.ChangeKind =
- (N1.ChangeKind == Move ? UpdateMove : Update);
}
- }
+ NodeId P2 = N2.Parent;
+ NodeId P1 = M.getSrc(P2);
+ assert(P1.isValid() &&
+ "Parents must be matched for determining the change type.");
+ Node &Parent1 = T1.getMutableNode(P1);
+ const Node &Parent2 = T2.getNode(P2);
+ auto &Siblings1 = Parent1.Children;
+ const auto &Siblings2 = Parent2.Children;
+ size_t Position;
+ for (Position = 0; Position < Siblings2.size(); ++Position)
+ if (Siblings2[Position] == Id2 || Position >= Siblings1.size())
+ break;
+ Changes.emplace_back(Insert, Id2, P2, Position);
+ Node PatchNode;
+ PatchNode.Parent = P1;
+ PatchNode.LeftMostDescendant = N2.LeftMostDescendant;
+ PatchNode.RightMostDescendant = N2.RightMostDescendant;
+ PatchNode.Depth = N2.Depth;
+ PatchNode.ASTNode = N2.ASTNode;
+ // TODO update Depth if needed
+ NodeId PatchNodeId = T1.getSize();
+ // TODO maybe choose a different data structure for Children.
+ Siblings1.insert(Siblings1.begin() + Position, PatchNodeId);
+ T1.addNode(PatchNode);
+ M.link(PatchNodeId, Id2);
+ }
+ for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) {
+ NodeId Id2 = M.getDst(Id1);
+ if (Id2.isInvalid())
+ Changes.emplace_back(Delete, Id1, Id2);
+ }
+ return Changes;
+}
+
+void ASTDiff::Impl::printChangeImpl(raw_ostream &OS, const Change &Chg) const {
+ switch (Chg.Kind) {
+ case Delete:
+ OS << "Delete ";
+ T1.printNode(OS, Chg.Src);
+ OS << "\n";
+ break;
+ case Update:
+ OS << "Update ";
+ T1.printNode(OS, Chg.Src);
+ OS << " to " << T2.getNodeValue(Chg.Dst) << "\n";
+ break;
+ case Insert:
+ OS << "Insert ";
+ T2.printNode(OS, Chg.Src);
+ OS << " into ";
+ T2.printNode(OS, Chg.Dst);
+ OS << " at " << Chg.Position << "\n";
+ break;
+ case Move:
+ llvm_unreachable("TODO");
+ break;
+ };
+}
+
+void ASTDiff::Impl::printMatchImpl(raw_ostream &OS, const Match &M) const {
+ OS << "Match ";
+ T1.printNode(OS, M.Src);
+ OS << " to ";
+ T2.printNode(OS, M.Dst);
+ OS << "\n";
}
ASTDiff::ASTDiff(SyntaxTree &T1, SyntaxTree &T2,
ASTDiff::~ASTDiff() = default;
-NodeId ASTDiff::getMapped(const SyntaxTree &SourceTree, NodeId Id) const {
- return DiffImpl->getMapped(SourceTree.TreeImpl, Id);
-}
-
SyntaxTree::SyntaxTree(const ASTContext &AST)
: TreeImpl(llvm::make_unique<SyntaxTree::Impl>(
this, AST.getTranslationUnitDecl(), AST)) {}
+std::vector<Match> ASTDiff::getMatches() {
+ DiffImpl->computeMapping();
+ return DiffImpl->getMatches(DiffImpl->TheMapping);
+}
+
+std::vector<Change> ASTDiff::getChanges() {
+ DiffImpl->computeMapping();
+ return DiffImpl->computeChanges(DiffImpl->TheMapping);
+}
+
+void ASTDiff::printChange(raw_ostream &OS, const Change &Chg) const {
+ DiffImpl->printChangeImpl(OS, Chg);
+}
+
+void ASTDiff::printMatch(raw_ostream &OS, const Match &M) const {
+ DiffImpl->printMatchImpl(OS, M);
+}
+
SyntaxTree::~SyntaxTree() = default;
const ASTContext &SyntaxTree::getASTContext() const { return TreeImpl->AST; }
return TreeImpl->getNode(Id);
}
-int SyntaxTree::getSize() const { return TreeImpl->getSize(); }
NodeId SyntaxTree::getRootId() const { return TreeImpl->getRootId(); }
-SyntaxTree::PreorderIterator SyntaxTree::begin() const {
- return TreeImpl->begin();
-}
-SyntaxTree::PreorderIterator SyntaxTree::end() const { return TreeImpl->end(); }
-int SyntaxTree::findPositionInParent(NodeId Id) const {
- return TreeImpl->findPositionInParent(Id);
-}
-
-std::pair<unsigned, unsigned>
-SyntaxTree::getSourceRangeOffsets(const Node &N) const {
+std::pair<unsigned, unsigned> SyntaxTree::getSourceRangeOffsets(const Node &N) const {
const SourceManager &SrcMgr = TreeImpl->AST.getSourceManager();
SourceRange Range = N.ASTNode.getSourceRange();
SourceLocation BeginLoc = Range.getBegin();
return {Begin, End};
}
-std::string SyntaxTree::getNodeValue(NodeId Id) const {
- return TreeImpl->getNodeValue(Id);
-}
-
-std::string SyntaxTree::getNodeValue(const Node &N) const {
- return TreeImpl->getNodeValue(N);
+std::string SyntaxTree::getNodeValue(const DynTypedNode &DTN) const {
+ return TreeImpl->getNodeValue(DTN);
}
} // end namespace diff
auto Offsets = Tree.getSourceRangeOffsets(N);
OS << R"(,"begin":)" << Offsets.first;
OS << R"(,"end":)" << Offsets.second;
- std::string Value = Tree.getNodeValue(N);
+ std::string Value = Tree.getNodeValue(N.ASTNode);
if (!Value.empty()) {
OS << R"(,"value":")";
printJsonString(OS, Value);
OS << "]}";
}
-static void printNode(raw_ostream &OS, diff::SyntaxTree &Tree,
- diff::NodeId Id) {
- if (Id.isInvalid()) {
- OS << "None";
- return;
- }
- OS << Tree.getNode(Id).getTypeLabel();
- std::string Value = Tree.getNodeValue(Id);
- if (!Value.empty())
- OS << ": " << Value;
- OS << "(" << Id << ")";
-}
-
-static void printDstChange(raw_ostream &OS, diff::ASTDiff &Diff,
- diff::SyntaxTree &SrcTree, diff::SyntaxTree &DstTree,
- diff::NodeId Dst) {
- const diff::Node &DstNode = DstTree.getNode(Dst);
- diff::NodeId Src = Diff.getMapped(DstTree, Dst);
- switch (DstNode.ChangeKind) {
- case diff::None:
- break;
- case diff::Delete:
- llvm_unreachable("The destination tree can't have deletions.");
- case diff::Update:
- OS << "Update ";
- printNode(OS, SrcTree, Src);
- OS << " to " << DstTree.getNodeValue(Dst) << "\n";
- break;
- case diff::Insert:
- case diff::Move:
- case diff::UpdateMove:
- if (DstNode.ChangeKind == diff::Insert)
- OS << "Insert";
- else if (DstNode.ChangeKind == diff::Move)
- OS << "Move";
- else if (DstNode.ChangeKind == diff::UpdateMove)
- OS << "Update and Move";
- OS << " ";
- printNode(OS, DstTree, Dst);
- OS << " into ";
- printNode(OS, DstTree, DstNode.Parent);
- OS << " at " << DstTree.findPositionInParent(Dst) << "\n";
- break;
- }
-}
-
int main(int argc, const char **argv) {
std::string ErrorMessage;
std::unique_ptr<CompilationDatabase> CommonCompilations =
Options.MaxSize = MaxSize;
diff::SyntaxTree SrcTree(Src->getASTContext());
diff::SyntaxTree DstTree(Dst->getASTContext());
- diff::ASTDiff Diff(SrcTree, DstTree, Options);
-
- for (diff::NodeId Dst : DstTree) {
- diff::NodeId Src = Diff.getMapped(DstTree, Dst);
- if (Src.isValid()) {
- llvm::outs() << "Match ";
- printNode(llvm::outs(), SrcTree, Src);
- llvm::outs() << " to ";
- printNode(llvm::outs(), DstTree, Dst);
- llvm::outs() << "\n";
- }
- printDstChange(llvm::outs(), Diff, SrcTree, DstTree, Dst);
- }
- for (diff::NodeId Src : SrcTree) {
- if (Diff.getMapped(SrcTree, Src).isInvalid()) {
- llvm::outs() << "Delete ";
- printNode(llvm::outs(), SrcTree, Src);
- llvm::outs() << "\n";
- }
- }
+ diff::ASTDiff DiffTool(SrcTree, DstTree, Options);
+ for (const auto &Match : DiffTool.getMatches())
+ DiffTool.printMatch(llvm::outs(), Match);
+ for (const auto &Change : DiffTool.getChanges())
+ DiffTool.printChange(llvm::outs(), Change);
return 0;
}