From 07cdaf54b0678548b578063229ba37eb79200ee1 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Wed, 12 Mar 2014 21:06:31 +0000 Subject: [PATCH] Revert "CodeGen: Use a binary format for instrumentation based profiling" I've clearly done something wrong with how to get this to link correctly. Reverting for now. This reverts commit r203711. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@203712 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CodeGenModule.cpp | 13 +- lib/CodeGen/CodeGenModule.h | 8 +- lib/CodeGen/CodeGenPGO.cpp | 184 ++++++++++++++---- lib/CodeGen/CodeGenPGO.h | 26 ++- test/Profile/Inputs/c-attributes.profdata | Bin 312 -> 161 bytes .../Inputs/c-counter-overflows.profdata | Bin 120 -> 93 bytes test/Profile/Inputs/c-general.profdata | Bin 1272 -> 436 bytes test/Profile/Inputs/c-outdated-data.profdata | Bin 128 -> 34 bytes test/Profile/Inputs/cxx-class.profdata | Bin 400 -> 163 bytes test/Profile/Inputs/cxx-throws.profdata | Bin 168 -> 53 bytes test/Profile/Inputs/objc-general.profdata | Bin 216 -> 99 bytes tools/driver/CMakeLists.txt | 1 - 12 files changed, 179 insertions(+), 53 deletions(-) diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 484c4edc24..4f040e2780 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -47,7 +47,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/Profile/ProfileDataReader.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" @@ -78,7 +77,7 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO, ABI(createCXXABI(*this)), VMContext(M.getContext()), TBAA(0), TheTargetCodeGenInfo(0), Types(*this), VTables(*this), ObjCRuntime(0), OpenCLRuntime(0), CUDARuntime(0), DebugInfo(0), ARCData(0), - NoObjCARCExceptionsMetadata(0), RRData(0), PGOReader(nullptr), + NoObjCARCExceptionsMetadata(0), RRData(0), PGOData(0), CFConstantStringClassRef(0), ConstantStringClassRef(0), NSConstantStringType(0), NSConcreteGlobalBlock(0), NSConcreteStackBlock(0), BlockObjectAssign(0), @@ -134,14 +133,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const CodeGenOptions &CGO, ARCData = new ARCEntrypoints(); RRData = new RREntrypoints(); - if (!CodeGenOpts.InstrProfileInput.empty()) { - if (llvm::error_code EC = llvm::ProfileDataReader::create( - CodeGenOpts.InstrProfileInput, PGOReader)) { - unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, - "Could not read profile: %0"); - getDiags().Report(DiagID) << EC.message(); - } - } + if (!CodeGenOpts.InstrProfileInput.empty()) + PGOData = new PGOProfileData(*this, CodeGenOpts.InstrProfileInput); } CodeGenModule::~CodeGenModule() { diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 061ec48e37..4bd8b7a3de 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -42,7 +42,6 @@ namespace llvm { class DataLayout; class FunctionType; class LLVMContext; - class ProfileDataReader; } namespace clang { @@ -86,6 +85,7 @@ namespace CodeGen { class CGCUDARuntime; class BlockFieldFlags; class FunctionArgList; + class PGOProfileData; struct OrderGlobalInits { unsigned int priority; @@ -257,7 +257,7 @@ class CodeGenModule : public CodeGenTypeCache { ARCEntrypoints *ARCData; llvm::MDNode *NoObjCARCExceptionsMetadata; RREntrypoints *RRData; - std::unique_ptr PGOReader; + PGOProfileData *PGOData; // WeakRefReferences - A set of references that have only been seen via // a weakref so far. This is used to remove the weak of the reference if we @@ -480,8 +480,8 @@ public: return *RRData; } - llvm::ProfileDataReader *getPGOReader() const { - return PGOReader.get(); + PGOProfileData *getPGOData() const { + return PGOData; } llvm::Constant *getStaticLocalDeclAddress(const VarDecl *D) { diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp index abd47ccd2c..3206daa76a 100644 --- a/lib/CodeGen/CodeGenPGO.cpp +++ b/lib/CodeGen/CodeGenPGO.cpp @@ -17,12 +17,132 @@ #include "clang/AST/StmtVisitor.h" #include "llvm/Config/config.h" // for strtoull()/strtoll() define #include "llvm/IR/MDBuilder.h" -#include "llvm/Profile/ProfileDataReader.h" #include "llvm/Support/FileSystem.h" using namespace clang; using namespace CodeGen; +static void ReportBadPGOData(CodeGenModule &CGM, const char *Message) { + DiagnosticsEngine &Diags = CGM.getDiags(); + unsigned diagID = Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0"); + Diags.Report(diagID) << Message; +} + +PGOProfileData::PGOProfileData(CodeGenModule &CGM, std::string Path) + : CGM(CGM) { + if (llvm::MemoryBuffer::getFile(Path, DataBuffer)) { + ReportBadPGOData(CGM, "failed to open pgo data file"); + return; + } + + if (DataBuffer->getBufferSize() > std::numeric_limits::max()) { + ReportBadPGOData(CGM, "pgo data file too big"); + return; + } + + // Scan through the data file and map each function to the corresponding + // file offset where its counts are stored. + const char *BufferStart = DataBuffer->getBufferStart(); + const char *BufferEnd = DataBuffer->getBufferEnd(); + const char *CurPtr = BufferStart; + uint64_t MaxCount = 0; + while (CurPtr < BufferEnd) { + // Read the function name. + const char *FuncStart = CurPtr; + // For Objective-C methods, the name may include whitespace, so search + // backward from the end of the line to find the space that separates the + // name from the number of counters. (This is a temporary hack since we are + // going to completely replace this file format in the near future.) + CurPtr = strchr(CurPtr, '\n'); + if (!CurPtr) { + ReportBadPGOData(CGM, "pgo data file has malformed function entry"); + return; + } + while (*--CurPtr != ' ') + ; + StringRef FuncName(FuncStart, CurPtr - FuncStart); + + // Read the number of counters. + char *EndPtr; + unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10); + if (EndPtr == CurPtr || *EndPtr != '\n' || NumCounters <= 0) { + ReportBadPGOData(CGM, "pgo data file has unexpected number of counters"); + return; + } + CurPtr = EndPtr; + + // Read function count. + uint64_t Count = strtoll(CurPtr, &EndPtr, 10); + if (EndPtr == CurPtr || *EndPtr != '\n') { + ReportBadPGOData(CGM, "pgo-data file has bad count value"); + return; + } + CurPtr = EndPtr; // Point to '\n'. + FunctionCounts[FuncName] = Count; + MaxCount = Count > MaxCount ? Count : MaxCount; + + // There is one line for each counter; skip over those lines. + // Since function count is already read, we start the loop from 1. + for (unsigned N = 1; N < NumCounters; ++N) { + CurPtr = strchr(++CurPtr, '\n'); + if (!CurPtr) { + ReportBadPGOData(CGM, "pgo data file is missing some counter info"); + return; + } + } + + // Skip over the blank line separating functions. + CurPtr += 2; + + DataOffsets[FuncName] = FuncStart - BufferStart; + } + MaxFunctionCount = MaxCount; +} + +bool PGOProfileData::getFunctionCounts(StringRef FuncName, + std::vector &Counts) { + // Find the relevant section of the pgo-data file. + llvm::StringMap::const_iterator OffsetIter = + DataOffsets.find(FuncName); + if (OffsetIter == DataOffsets.end()) + return true; + const char *CurPtr = DataBuffer->getBufferStart() + OffsetIter->getValue(); + + // Skip over the function name. + CurPtr = strchr(CurPtr, '\n'); + assert(CurPtr && "pgo-data has corrupted function entry"); + while (*--CurPtr != ' ') + ; + + // Read the number of counters. + char *EndPtr; + unsigned NumCounters = strtol(++CurPtr, &EndPtr, 10); + assert(EndPtr != CurPtr && *EndPtr == '\n' && NumCounters > 0 && + "pgo-data file has corrupted number of counters"); + CurPtr = EndPtr; + + Counts.reserve(NumCounters); + + for (unsigned N = 0; N < NumCounters; ++N) { + // Read the count value. + uint64_t Count = strtoll(CurPtr, &EndPtr, 10); + if (EndPtr == CurPtr || *EndPtr != '\n') { + ReportBadPGOData(CGM, "pgo-data file has bad count value"); + return true; + } + Counts.push_back(Count); + CurPtr = EndPtr + 1; + } + + // Make sure the number of counters matches up. + if (Counts.size() != NumCounters) { + ReportBadPGOData(CGM, "pgo-data file has inconsistent counters"); + return true; + } + + return false; +} + void CodeGenPGO::setFuncName(llvm::Function *Fn) { StringRef Func = Fn->getName(); @@ -57,23 +177,22 @@ void CodeGenPGO::emitWriteoutFunction() { llvm::Type *Int32Ty = llvm::Type::getInt32Ty(Ctx); llvm::Type *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx); - llvm::Function *AddFuncsF = - CGM.getModule().getFunction("__llvm_pgo_add_functions"); - if (!AddFuncsF) { - llvm::FunctionType *AddFuncsFTy = + llvm::Function *WriteoutF = + CGM.getModule().getFunction("__llvm_pgo_writeout"); + if (!WriteoutF) { + llvm::FunctionType *WriteoutFTy = llvm::FunctionType::get(llvm::Type::getVoidTy(Ctx), false); - AddFuncsF = llvm::Function::Create(AddFuncsFTy, + WriteoutF = llvm::Function::Create(WriteoutFTy, llvm::GlobalValue::InternalLinkage, - "__llvm_pgo_add_functions", - &CGM.getModule()); + "__llvm_pgo_writeout", &CGM.getModule()); } - AddFuncsF->setUnnamedAddr(true); - AddFuncsF->addFnAttr(llvm::Attribute::NoInline); + WriteoutF->setUnnamedAddr(true); + WriteoutF->addFnAttr(llvm::Attribute::NoInline); if (CGM.getCodeGenOpts().DisableRedZone) - AddFuncsF->addFnAttr(llvm::Attribute::NoRedZone); + WriteoutF->addFnAttr(llvm::Attribute::NoRedZone); - llvm::BasicBlock *BB = AddFuncsF->empty() ? - llvm::BasicBlock::Create(Ctx, "", AddFuncsF) : &AddFuncsF->getEntryBlock(); + llvm::BasicBlock *BB = WriteoutF->empty() ? + llvm::BasicBlock::Create(Ctx, "", WriteoutF) : &WriteoutF->getEntryBlock(); CGBuilderTy PGOBuilder(BB); @@ -83,35 +202,32 @@ void CodeGenPGO::emitWriteoutFunction() { PGOBuilder.SetInsertPoint(I); llvm::Type *Int64PtrTy = llvm::Type::getInt64PtrTy(Ctx); - llvm::Type *Int64Ty = llvm::Type::getInt64Ty(Ctx); llvm::Type *Args[] = { Int8PtrTy, // const char *FuncName - Int64Ty, // uint64_t FunctionHash Int32Ty, // uint32_t NumCounters Int64PtrTy // uint64_t *Counters }; llvm::FunctionType *FTy = llvm::FunctionType::get(PGOBuilder.getVoidTy(), Args, false); llvm::Constant *EmitFunc = - CGM.getModule().getOrInsertFunction("llvm_pgo_add_function", FTy); + CGM.getModule().getOrInsertFunction("llvm_pgo_emit", FTy); llvm::Constant *NameString = CGM.GetAddrOfConstantCString(getFuncName(), "__llvm_pgo_name"); NameString = llvm::ConstantExpr::getBitCast(NameString, Int8PtrTy); - PGOBuilder.CreateCall4(EmitFunc, NameString, - // TODO: This should be a hash, not just the count! - PGOBuilder.getInt64(NumRegionCounters), + PGOBuilder.CreateCall3(EmitFunc, NameString, PGOBuilder.getInt32(NumRegionCounters), PGOBuilder.CreateBitCast(RegionCounters, Int64PtrTy)); } llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) { - llvm::Function *AddFuncsF = - CGM.getModule().getFunction("__llvm_pgo_add_functions"); - if (!AddFuncsF) + llvm::Function *WriteoutF = + CGM.getModule().getFunction("__llvm_pgo_writeout"); + if (!WriteoutF) return NULL; - // Create a small bit of code that initializes PGO at startup + // Create a small bit of code that registers the "__llvm_pgo_writeout" to + // be executed at exit. llvm::Function *F = CGM.getModule().getFunction("__llvm_pgo_init"); if (F) return NULL; @@ -139,7 +255,7 @@ llvm::Function *CodeGenPGO::emitInitialization(CodeGenModule &CGM) { // Inialize the environment and register the local writeout function. llvm::Constant *PGOInit = CGM.getModule().getOrInsertFunction("llvm_pgo_init", FTy); - PGOBuilder.CreateCall(PGOInit, AddFuncsF); + PGOBuilder.CreateCall(PGOInit, WriteoutF); PGOBuilder.CreateRetVoid(); return F; @@ -642,8 +758,8 @@ namespace { void CodeGenPGO::assignRegionCounters(const Decl *D, llvm::Function *Fn) { bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate; - llvm::ProfileDataReader *PGOReader = CGM.getPGOReader(); - if (!InstrumentRegions && !PGOReader) + PGOProfileData *PGOData = CGM.getPGOData(); + if (!InstrumentRegions && !PGOData) return; if (!D) return; @@ -651,10 +767,10 @@ void CodeGenPGO::assignRegionCounters(const Decl *D, llvm::Function *Fn) { mapRegionCounters(D); if (InstrumentRegions) emitCounterVariables(); - if (PGOReader) { - loadRegionCounts(PGOReader); + if (PGOData) { + loadRegionCounts(PGOData); computeRegionCounts(D); - applyFunctionAttributes(PGOReader, Fn); + applyFunctionAttributes(PGOData, Fn); } } @@ -681,12 +797,12 @@ void CodeGenPGO::computeRegionCounts(const Decl *D) { Walker.VisitBlockDecl(BD); } -void CodeGenPGO::applyFunctionAttributes(llvm::ProfileDataReader *PGOReader, +void CodeGenPGO::applyFunctionAttributes(PGOProfileData *PGOData, llvm::Function *Fn) { if (!haveRegionCounts()) return; - uint64_t MaxFunctionCount = PGOReader->getMaximumFunctionCount(); + uint64_t MaxFunctionCount = PGOData->getMaximumFunctionCount(); uint64_t FunctionCount = getRegionCount(0); if (FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount)) // Turn on InlineHint attribute for hot functions. @@ -719,15 +835,13 @@ void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, unsigned Counter) { Builder.CreateStore(Count, Addr); } -void CodeGenPGO::loadRegionCounts(llvm::ProfileDataReader *PGOReader) { +void CodeGenPGO::loadRegionCounts(PGOProfileData *PGOData) { // For now, ignore the counts from the PGO data file only if the number of // counters does not match. This could be tightened down in the future to // ignore counts when the input changes in various ways, e.g., by comparing a // hash value based on some characteristics of the input. RegionCounts = new std::vector(); - uint64_t Hash; - // TODO: Check for hash mismatch - if (PGOReader->getFunctionCounts(getFuncName(), Hash, *RegionCounts) || + if (PGOData->getFunctionCounts(getFuncName(), *RegionCounts) || RegionCounts->size() != NumRegionCounters) { delete RegionCounts; RegionCounts = 0; diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h index 7c19414a0b..51d59cf9a9 100644 --- a/lib/CodeGen/CodeGenPGO.h +++ b/lib/CodeGen/CodeGenPGO.h @@ -26,6 +26,27 @@ namespace clang { namespace CodeGen { class RegionCounter; +/// The raw counter data from an instrumented PGO binary +class PGOProfileData { +private: + /// The PGO data + std::unique_ptr DataBuffer; + /// Offsets into DataBuffer for each function's counters + llvm::StringMap DataOffsets; + /// Execution counts for each function. + llvm::StringMap FunctionCounts; + /// The maximal execution count among all functions. + uint64_t MaxFunctionCount; + CodeGenModule &CGM; +public: + PGOProfileData(CodeGenModule &CGM, std::string Path); + /// Fill Counts with the profile data for the given function name. Returns + /// false on success. + bool getFunctionCounts(StringRef FuncName, std::vector &Counts); + /// Return the maximum of all known function counts. + uint64_t getMaximumFunctionCount() { return MaxFunctionCount; } +}; + /// Per-function PGO state. This class should generally not be used directly, /// but instead through the CodeGenFunction and RegionCounter types. class CodeGenPGO { @@ -115,9 +136,8 @@ private: void setFuncName(llvm::Function *Fn); void mapRegionCounters(const Decl *D); void computeRegionCounts(const Decl *D); - void applyFunctionAttributes(llvm::ProfileDataReader *PGOReader, - llvm::Function *Fn); - void loadRegionCounts(llvm::ProfileDataReader *PGOReader); + void applyFunctionAttributes(PGOProfileData *PGOData, llvm::Function *Fn); + void loadRegionCounts(PGOProfileData *PGOData); void emitCounterVariables(); /// Emit code to increment the counter at the given index diff --git a/test/Profile/Inputs/c-attributes.profdata b/test/Profile/Inputs/c-attributes.profdata index 8cb0af84f0ac8ece085aedb6788c9b8b7831649e..38decbd5568a7d573261c65cf2192e11df618078 100644 GIT binary patch literal 161 zcmYj~Q3`+{5Jdmy6dgguvQ8smg@I{^-M>)@nvZ37<}vG2u~GPJCKvUa|v_%L&BE`8g?Ifdn9j1&DJKGxHjNG!QUBC^!wXcY_|Q7RUwY@(yz# z8XoLirw?MG%PaUnQ|fZ`HF literal 120 zcmeYX2y$a&U|>)IVh{kaL4XB_a}zW3z#JgK0cF5wn7R-D|NrL)%6$aVd_Wq;{|)4W N%s{7s!v8@=0RW8y9{B(O diff --git a/test/Profile/Inputs/c-general.profdata b/test/Profile/Inputs/c-general.profdata index 6a248f48dea0717b558513b5bbaaecf276e2948e..083b8fe55b4d7b61be68a2c57cc7bded95dbf5e7 100644 GIT binary patch literal 436 zcmY*VTW-W44E*;gxIilcNp{oAAj+}{6@e%aZF~EUN%~RwfrZB&Yf4zR=uOOdE25{4 zIw@P+1|pZ!Bh-+SjYV|mway?3t~Gl%(loW->mhIV_+$Kcs70Vd0P~<4{*om6I##eL z#R~!8I5Tjh5m?`TPD(B%O+@7ts#`p6;myqky0v-LCN=di*)kJ?>od&eoD&^GH5izf z31O#m1w4=N2?}F5M{kqKo8N8Cdl7eTLLA-GI1+ZDfC7-Bf%A5W$fvPHEvxZgO^}+M zP5F@&@3!?5kt?!Z$D`V7&ox>8&9>RF@SoCrL-dOsB%SxXXw05E&Xq-4C9W=-v~`W2 zNG@&ILkay>uZQy;-`c4fPpTF$V2KIu6yFq7x^pEv=$V2{4u+3H2;2-^?sXGW8 zNIm3StmpZhliz6;d|EVh_-N{4>RkK{G~Z!ABw<{S-U;Vc1M%t2{UG;)%mbtj4Ry#h z;{H0#8j$?S%ryq$2m7}VK63`~A@79vkoQ6TKh%Jn%RAeg2Hpm8fBatXnQwj4C literal 128 zcmeYX2y$a&U|_HUVh{kaL4Xg4^YY_MixZP_QsYw+OA)CepJ(q(ChEMbvuR4EGr literal 168 zcmeYX2y$a&U|=u