From: Chandler Carruth Date: Thu, 26 Mar 2015 23:54:15 +0000 (+0000) Subject: [Modules] Make the AST serialization always use lexicographic order when X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=eb1f564a0e0739bd50be87115dcff149eb8de368;p=clang [Modules] Make the AST serialization always use lexicographic order when traversing the identifier table. No easy test case as this table is somewhere between hard and impossible to observe as non-deterministically ordered. The table is a hash table but we hash the string contents and never remove entries from the table so the growth pattern, etc, is all completely fixed. However, relying on the hash function being deterministic is specifically against the long-term direction of LLVM's hashing datastructures, which are intended to provide *no* ordering guarantees. As such, this defends against these things by sorting the identifiers. Sorting identifiers right before we emit them to a serialized form seems a low cost for predictability here. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@233332 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h index f540c58e5a..67371f51f3 100644 --- a/include/clang/Basic/IdentifierTable.h +++ b/include/clang/Basic/IdentifierTable.h @@ -308,7 +308,12 @@ public: else RecomputeNeedsHandleIdentifier(); } - + + /// \brief Provide less than operator for lexicographical sorting. + bool operator<(const IdentifierInfo &RHS) const { + return getName() < RHS.getName(); + } + private: /// The Preprocessor::HandleIdentifier does several special (but rare) /// things to identifiers of various sorts. For example, it changes the diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp index 3f2c46d80e..655c9fd28b 100644 --- a/lib/Serialization/ASTWriter.cpp +++ b/lib/Serialization/ASTWriter.cpp @@ -3504,10 +3504,16 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP, // table to enable checking of the predefines buffer in the case // where the user adds new macro definitions when building the AST // file. + SmallVector IIs; for (IdentifierTable::iterator ID = PP.getIdentifierTable().begin(), IDEnd = PP.getIdentifierTable().end(); ID != IDEnd; ++ID) - getIdentifierRef(ID->second); + IIs.push_back(ID->second); + // Sort the identifiers lexicographically before getting them references so + // that their order is stable. + std::sort(IIs.begin(), IIs.end(), llvm::less_ptr()); + for (const IdentifierInfo *II : IIs) + getIdentifierRef(II); // Create the on-disk hash table representation. We only store offsets // for identifiers that appear here for the first time. @@ -4504,15 +4510,17 @@ void ASTWriter::WriteASTCore(Sema &SemaRef, // Make sure all decls associated with an identifier are registered for // serialization. - llvm::SmallVector IIsToVisit; + llvm::SmallVector IIs; for (IdentifierTable::iterator ID = PP.getIdentifierTable().begin(), IDEnd = PP.getIdentifierTable().end(); ID != IDEnd; ++ID) { const IdentifierInfo *II = ID->second; if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization()) - IIsToVisit.push_back(II); + IIs.push_back(II); } - for (const IdentifierInfo *II : IIsToVisit) { + // Sort the identifiers to visit based on their name. + std::sort(IIs.begin(), IIs.end(), llvm::less_ptr()); + for (const IdentifierInfo *II : IIs) { for (IdentifierResolver::iterator D = SemaRef.IdResolver.begin(II), DEnd = SemaRef.IdResolver.end(); D != DEnd; ++D) {