From: Richard Trieu Date: Wed, 22 Feb 2017 01:11:25 +0000 (+0000) Subject: Add more ODR checking. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d6496f0a0eeb80dc778e0141711397b1402e591b;p=clang Add more ODR checking. Add the basics for the ODRHash class, which will only process Decl's from a whitelist, which currently only has AccessSpecDecl. Different access specifiers in merged classes can now be detected. Differential Revision: https://reviews.llvm.org/D21675 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@295800 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/AST/ODRHash.h b/include/clang/AST/ODRHash.h new file mode 100644 index 0000000000..9af8488fca --- /dev/null +++ b/include/clang/AST/ODRHash.h @@ -0,0 +1,84 @@ +//===-- ODRHash.h - Hashing to diagnose ODR failures ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the ODRHash class, which calculates +/// a hash based on AST nodes, which is stable across different runs. +/// +//===----------------------------------------------------------------------===// + +#include "clang/AST/DeclarationName.h" +#include "clang/AST/Type.h" +#include "clang/AST/TemplateBase.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang { + +class Decl; +class IdentifierInfo; +class NestedNameSpecifer; +class Stmt; +class TemplateParameterList; + +// ODRHash is used to calculate a hash based on AST node contents that +// does not rely on pointer addresses. This allows the hash to not vary +// between runs and is usable to detect ODR problems in modules. To use, +// construct an ODRHash object, then call Add* methods over the nodes that +// need to be hashed. Then call CalculateHash to get the hash value. +// Typically, only one Add* call is needed. clear can be called to reuse the +// object. +class ODRHash { + // Use DenseMaps to convert between Decl and Type pointers and an index value. + llvm::DenseMap DeclMap; + llvm::DenseMap TypeMap; + + // Save space by processing bools at the end. + llvm::SmallVector Bools; + + llvm::FoldingSetNodeID ID; + +public: + ODRHash() {} + + // Use this for ODR checking classes between modules. This method compares + // more information than the AddDecl class. + void AddCXXRecordDecl(const CXXRecordDecl *Record); + + // Process SubDecls of the main Decl. This method calls the DeclVisitor + // while AddDecl does not. + void AddSubDecl(const Decl *D); + + // Reset the object for reuse. + void clear(); + + // Add booleans to ID and uses it to calculate the hash. + unsigned CalculateHash(); + + // Add AST nodes that need to be processed. + void AddDecl(const Decl *D); + void AddType(const Type *T); + void AddQualType(QualType T); + void AddStmt(const Stmt *S); + void AddIdentifierInfo(const IdentifierInfo *II); + void AddNestedNameSpecifier(const NestedNameSpecifier *NNS); + void AddTemplateName(TemplateName Name); + void AddDeclarationName(DeclarationName Name); + void AddTemplateArgument(TemplateArgument TA); + void AddTemplateParameterList(const TemplateParameterList *TPL); + + // Save booleans until the end to lower the size of data to process. + void AddBoolean(bool value); + + static bool isWhitelistedDecl(const Decl* D, const CXXRecordDecl *Record); +}; + +} // end namespace clang diff --git a/include/clang/Basic/DiagnosticSerializationKinds.td b/include/clang/Basic/DiagnosticSerializationKinds.td index 066a1f5fa6..30d48aef77 100644 --- a/include/clang/Basic/DiagnosticSerializationKinds.td +++ b/include/clang/Basic/DiagnosticSerializationKinds.td @@ -117,6 +117,15 @@ def note_module_odr_violation_different_definitions : Note< def err_module_odr_violation_different_instantiations : Error< "instantiation of %q0 is different in different modules">; +def err_module_odr_violation_mismatch_decl : Error< + "%q0 has different definitions in different modules; first difference is " + "%select{definition in module '%2'|defined here}1 found " + "%select{end of class|public access specifier|private access specifier|" + "protected access specifier}3">; +def note_module_odr_violation_mismatch_decl : Note<"but in '%0' found " + "%select{end of class|public access specifier|private access specifier|" + "protected access specifier}1">; + def warn_module_uses_date_time : Warning< "%select{precompiled header|module}0 uses __DATE__ or __TIME__">, InGroup>; diff --git a/lib/AST/CMakeLists.txt b/lib/AST/CMakeLists.txt index e28bd2e16d..2e98f524da 100644 --- a/lib/AST/CMakeLists.txt +++ b/lib/AST/CMakeLists.txt @@ -40,6 +40,7 @@ add_clang_library(clangAST MicrosoftMangle.cpp NestedNameSpecifier.cpp NSAPI.cpp + ODRHash.cpp OpenMPClause.cpp ParentMap.cpp RawCommentList.cpp diff --git a/lib/AST/DeclCXX.cpp b/lib/AST/DeclCXX.cpp index 19ffe1a720..3846ace1aa 100644 --- a/lib/AST/DeclCXX.cpp +++ b/lib/AST/DeclCXX.cpp @@ -18,6 +18,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/ODRHash.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/IdentifierTable.h" #include "llvm/ADT/STLExtras.h" @@ -371,7 +372,15 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, data().IsParsingBaseSpecifiers = false; } -void CXXRecordDecl::computeODRHash() {} +void CXXRecordDecl::computeODRHash() { + if (!DefinitionData) + return; + + ODRHash Hash; + Hash.AddCXXRecordDecl(this); + + DefinitionData->ODRHash = Hash.CalculateHash(); +} void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { // C++11 [class.copy]p11: diff --git a/lib/AST/ODRHash.cpp b/lib/AST/ODRHash.cpp new file mode 100644 index 0000000000..b588b31db9 --- /dev/null +++ b/lib/AST/ODRHash.cpp @@ -0,0 +1,152 @@ +//===-- ODRHash.cpp - Hashing to diagnose ODR failures ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements the ODRHash class, which calculates a hash based +/// on AST nodes, which is stable across different runs. +/// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ODRHash.h" + +#include "clang/AST/DeclVisitor.h" +#include "clang/AST/NestedNameSpecifier.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/TypeVisitor.h" + +using namespace clang; + +void ODRHash::AddStmt(const Stmt *S) {} +void ODRHash::AddIdentifierInfo(const IdentifierInfo *II) {} +void ODRHash::AddNestedNameSpecifier(const NestedNameSpecifier *NNS) {} +void ODRHash::AddTemplateName(TemplateName Name) {} +void ODRHash::AddDeclarationName(DeclarationName Name) {} +void ODRHash::AddTemplateArgument(TemplateArgument TA) {} +void ODRHash::AddTemplateParameterList(const TemplateParameterList *TPL) {} + +void ODRHash::clear() { + DeclMap.clear(); + TypeMap.clear(); + Bools.clear(); + ID.clear(); +} + +unsigned ODRHash::CalculateHash() { + // Append the bools to the end of the data segment backwards. This allows + // for the bools data to be compressed 32 times smaller compared to using + // ID.AddBoolean + const unsigned unsigned_bits = sizeof(unsigned) * CHAR_BIT; + const unsigned size = Bools.size(); + const unsigned remainder = size % unsigned_bits; + const unsigned loops = size / unsigned_bits; + auto I = Bools.rbegin(); + unsigned value = 0; + for (unsigned i = 0; i < remainder; ++i) { + value <<= 1; + value |= *I; + ++I; + } + ID.AddInteger(value); + + for (unsigned i = 0; i < loops; ++i) { + value = 0; + for (unsigned j = 0; j < unsigned_bits; ++j) { + value <<= 1; + value |= *I; + ++I; + } + ID.AddInteger(value); + } + + assert(I == Bools.rend()); + Bools.clear(); + return ID.ComputeHash(); +} + +// Process a Decl pointer. Add* methods call back into ODRHash while Visit* +// methods process the relevant parts of the Decl. +class ODRDeclVisitor : public ConstDeclVisitor { + typedef ConstDeclVisitor Inherited; + llvm::FoldingSetNodeID &ID; + ODRHash &Hash; + +public: + ODRDeclVisitor(llvm::FoldingSetNodeID &ID, ODRHash &Hash) + : ID(ID), Hash(Hash) {} + + void Visit(const Decl *D) { + ID.AddInteger(D->getKind()); + Inherited::Visit(D); + } + + void VisitAccessSpecDecl(const AccessSpecDecl *D) { + ID.AddInteger(D->getAccess()); + Inherited::VisitAccessSpecDecl(D); + } +}; + +// Only allow a small portion of Decl's to be processed. Remove this once +// all Decl's can be handled. +bool ODRHash::isWhitelistedDecl(const Decl *D, const CXXRecordDecl *Parent) { + if (D->isImplicit()) return false; + if (D->getDeclContext() != Parent) return false; + + switch (D->getKind()) { + default: + return false; + case Decl::AccessSpec: + return true; + } +} + +void ODRHash::AddSubDecl(const Decl *D) { + assert(D && "Expecting non-null pointer."); + AddDecl(D); + + ODRDeclVisitor(ID, *this).Visit(D); +} + +void ODRHash::AddCXXRecordDecl(const CXXRecordDecl *Record) { + assert(Record && Record->hasDefinition() && + "Expected non-null record to be a definition."); + AddDecl(Record); + + // Filter out sub-Decls which will not be processed in order to get an + // accurate count of Decl's. + llvm::SmallVector Decls; + for (const Decl *SubDecl : Record->decls()) { + if (isWhitelistedDecl(SubDecl, Record)) { + Decls.push_back(SubDecl); + } + } + + ID.AddInteger(Decls.size()); + for (auto SubDecl : Decls) { + AddSubDecl(SubDecl); + } +} + +void ODRHash::AddDecl(const Decl *D) { + assert(D && "Expecting non-null pointer."); + auto Result = DeclMap.insert(std::make_pair(D, DeclMap.size())); + ID.AddInteger(Result.first->second); + // On first encounter of a Decl pointer, process it. Every time afterwards, + // only the index value is needed. + if (!Result.second) { + return; + } + + ID.AddInteger(D->getKind()); +} + +void ODRHash::AddType(const Type *T) {} +void ODRHash::AddQualType(QualType T) {} +void ODRHash::AddBoolean(bool Value) { + Bools.push_back(Value); +} diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp index 985d68e348..c8ee5c6529 100644 --- a/lib/Serialization/ASTReader.cpp +++ b/lib/Serialization/ASTReader.cpp @@ -26,6 +26,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/NestedNameSpecifier.h" +#include "clang/AST/ODRHash.h" #include "clang/AST/RawCommentList.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLocVisitor.h" @@ -8918,24 +8919,143 @@ void ASTReader::diagnoseOdrViolations() { continue; bool Diagnosed = false; - for (auto *RD : Merge.second) { + CXXRecordDecl *FirstRecord = Merge.first; + std::string FirstModule = getOwningModuleNameForDiagnostic(FirstRecord); + for (CXXRecordDecl *SecondRecord : Merge.second) { // Multiple different declarations got merged together; tell the user // where they came from. - if (Merge.first != RD) { - // FIXME: Walk the definition, figure out what's different, - // and diagnose that. - if (!Diagnosed) { - std::string Module = getOwningModuleNameForDiagnostic(Merge.first); - Diag(Merge.first->getLocation(), - diag::err_module_odr_violation_different_definitions) - << Merge.first << Module.empty() << Module; - Diagnosed = true; + if (FirstRecord == SecondRecord) + continue; + + std::string SecondModule = getOwningModuleNameForDiagnostic(SecondRecord); + using DeclHashes = llvm::SmallVector, 4>; + DeclHashes FirstHashes; + DeclHashes SecondHashes; + ODRHash Hash; + + auto PopulateHashes = [&Hash, FirstRecord](DeclHashes &Hashes, + CXXRecordDecl *Record) { + for (auto *D : Record->decls()) { + // Due to decl merging, the first CXXRecordDecl is the parent of + // Decls in both records. + if (!ODRHash::isWhitelistedDecl(D, FirstRecord)) + continue; + Hash.clear(); + Hash.AddSubDecl(D); + Hashes.emplace_back(D, Hash.CalculateHash()); + } + }; + PopulateHashes(FirstHashes, FirstRecord); + PopulateHashes(SecondHashes, SecondRecord); + + // Used with err_module_odr_violation_mismatch_decl and + // note_module_odr_violation_mismatch_decl + enum { + EndOfClass, + PublicSpecifer, + PrivateSpecifer, + ProtectedSpecifer, + Other + } FirstDiffType = Other, + SecondDiffType = Other; + + auto DifferenceSelector = [](Decl *D) { + assert(D && "valid Decl required"); + switch (D->getKind()) { + default: + return Other; + case Decl::AccessSpec: + switch (D->getAccess()) { + case AS_public: + return PublicSpecifer; + case AS_private: + return PrivateSpecifer; + case AS_protected: + return ProtectedSpecifer; + case AS_none: + llvm_unreachable("Invalid access specifier"); + } + } + }; + + Decl *FirstDecl = nullptr; + Decl *SecondDecl = nullptr; + auto FirstIt = FirstHashes.begin(); + auto SecondIt = SecondHashes.begin(); + + // If there is a diagnoseable difference, FirstDiffType and + // SecondDiffType will not be Other and FirstDecl and SecondDecl will be + // filled in if not EndOfClass. + while (FirstIt != FirstHashes.end() || SecondIt != SecondHashes.end()) { + if (FirstIt->second == SecondIt->second) { + ++FirstIt; + ++SecondIt; + continue; } - Diag(RD->getLocation(), + FirstDecl = FirstIt == FirstHashes.end() ? nullptr : FirstIt->first; + SecondDecl = SecondIt == SecondHashes.end() ? nullptr : SecondIt->first; + + FirstDiffType = FirstDecl ? DifferenceSelector(FirstDecl) : EndOfClass; + SecondDiffType = + SecondDecl ? DifferenceSelector(SecondDecl) : EndOfClass; + + break; + } + + if (FirstDiffType == Other || SecondDiffType == Other) { + // Reaching this point means an unexpected Decl was encountered + // or no difference was detected. This causes a generic error + // message to be emitted. + Diag(FirstRecord->getLocation(), + diag::err_module_odr_violation_different_definitions) + << FirstRecord << FirstModule.empty() << FirstModule; + + Diag(SecondRecord->getLocation(), diag::note_module_odr_violation_different_definitions) - << getOwningModuleNameForDiagnostic(RD); + << SecondModule; + Diagnosed = true; + break; } + + if (FirstDiffType != SecondDiffType) { + SourceLocation FirstLoc; + SourceRange FirstRange; + if (FirstDiffType == EndOfClass) { + FirstLoc = FirstRecord->getBraceRange().getEnd(); + } else { + FirstLoc = FirstIt->first->getLocation(); + FirstRange = FirstIt->first->getSourceRange(); + } + Diag(FirstLoc, diag::err_module_odr_violation_mismatch_decl) + << FirstRecord << FirstModule.empty() << FirstModule << FirstRange + << FirstDiffType; + + SourceLocation SecondLoc; + SourceRange SecondRange; + if (SecondDiffType == EndOfClass) { + SecondLoc = SecondRecord->getBraceRange().getEnd(); + } else { + SecondLoc = SecondDecl->getLocation(); + SecondRange = SecondDecl->getSourceRange(); + } + Diag(SecondLoc, diag::note_module_odr_violation_mismatch_decl) + << SecondModule << SecondRange << SecondDiffType; + Diagnosed = true; + break; + } + + if (Diagnosed == true) + continue; + + Diag(FirstRecord->getLocation(), + diag::err_module_odr_violation_different_definitions) + << FirstRecord << FirstModule.empty() << FirstModule; + + Diag(SecondRecord->getLocation(), + diag::note_module_odr_violation_different_definitions) + << SecondModule; + Diagnosed = true; } if (!Diagnosed) { diff --git a/test/Modules/odr_hash.cpp b/test/Modules/odr_hash.cpp new file mode 100644 index 0000000000..51eb658fb3 --- /dev/null +++ b/test/Modules/odr_hash.cpp @@ -0,0 +1,412 @@ +// Clear and create directories +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: mkdir %t/cache +// RUN: mkdir %t/Inputs + +// Build first header file +// RUN: echo "#define FIRST" >> %t/Inputs/first.h +// RUN: cat %s >> %t/Inputs/first.h + +// Build second header file +// RUN: echo "#define SECOND" >> %t/Inputs/second.h +// RUN: cat %s >> %t/Inputs/second.h + +// Build module map file +// RUN: echo "module FirstModule {" >> %t/Inputs/module.map +// RUN: echo " header \"first.h\"" >> %t/Inputs/module.map +// RUN: echo "}" >> %t/Inputs/module.map +// RUN: echo "module SecondModule {" >> %t/Inputs/module.map +// RUN: echo " header \"second.h\"" >> %t/Inputs/module.map +// RUN: echo "}" >> %t/Inputs/module.map + +// Run test +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -x c++ -I%t/Inputs -verify %s -std=c++11 + +#if !defined(FIRST) && !defined(SECOND) +#include "first.h" +#include "second.h" +#endif + +namespace AccessSpecifiers { +#if defined(FIRST) +struct S1 { +}; +#elif defined(SECOND) +struct S1 { + private: +}; +#else +S1 s1; +// expected-error@second.h:* {{'AccessSpecifiers::S1' has different definitions in different modules; first difference is definition in module 'SecondModule' found private access specifier}} +// expected-note@first.h:* {{but in 'FirstModule' found end of class}} +#endif + +#if defined(FIRST) +struct S2 { + public: +}; +#elif defined(SECOND) +struct S2 { + protected: +}; +#else +S2 s2; +// expected-error@second.h:* {{'AccessSpecifiers::S2' has different definitions in different modules; first difference is definition in module 'SecondModule' found protected access specifier}} +// expected-note@first.h:* {{but in 'FirstModule' found public access specifier}} +#endif +} // namespace AccessSpecifiers + +// Naive parsing of AST can lead to cycles in processing. Ensure +// self-references don't trigger an endless cycles of AST node processing. +namespace SelfReference { +#if defined(FIRST) +template