From: Mehdi Amini Date: Wed, 28 Dec 2016 22:30:28 +0000 (+0000) Subject: Add an index for Module Metadata record in the bitcode X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=89bf9692cc954b29de3360d129d80988b64cf559;p=llvm Add an index for Module Metadata record in the bitcode This index record the position for each metadata record in the bitcode, so that the reader will be able to lazy-load on demand each individual record. We also make sure that every abbrev is emitted upfront so that the block can be skipped while reading. I don't plan to commit this before having the reader counterpart, but I figured this can be reviewed mostly independently. Recommit r290684 (was reverted in r290686 because a test was broken) after adding a threshold to avoid emitting the index when unnecessary (little amount of metadata). This optimization "hides" a limitation of the ability to backpatch in the bitstream: we can only backpatch safely when the position has been flushed. So if we emit an index for one metadata, it is possible that (part of) the offset placeholder hasn't been flushed and the backpatch will fail. Differential Revision: https://reviews.llvm.org/D28083 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290690 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h index b79e88d2c05..06113869ad6 100644 --- a/include/llvm/Bitcode/BitstreamWriter.h +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -112,6 +112,11 @@ public: &Out[ByteNo], NewWord, BitNo & 7); } + void BackpatchWord64(uint64_t BitNo, uint64_t Val) { + BackpatchWord(BitNo, (uint32_t)Val); + BackpatchWord(BitNo + 32, (uint32_t)(Val >> 32)); + } + void Emit(uint32_t Val, unsigned NumBits) { assert(NumBits && NumBits <= 32 && "Invalid value size!"); assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!"); @@ -279,7 +284,7 @@ private: default: llvm_unreachable("Unknown encoding!"); case BitCodeAbbrevOp::Fixed: if (Op.getEncodingData()) - Emit((unsigned)V, (unsigned)Op.getEncodingData()); + Emit64(V, (unsigned)Op.getEncodingData()); break; case BitCodeAbbrevOp::VBR: if (Op.getEncodingData()) diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index f42e75427b3..c996c38261c 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -218,30 +218,30 @@ enum GlobalValueSummarySymtabCodes { }; enum MetadataCodes { - METADATA_STRING_OLD = 1, // MDSTRING: [values] - METADATA_VALUE = 2, // VALUE: [type num, value num] - METADATA_NODE = 3, // NODE: [n x md num] - METADATA_NAME = 4, // STRING: [values] - METADATA_DISTINCT_NODE = 5, // DISTINCT_NODE: [n x md num] - METADATA_KIND = 6, // [n x [id, name]] - METADATA_LOCATION = 7, // [distinct, line, col, scope, inlined-at?] - METADATA_OLD_NODE = 8, // OLD_NODE: [n x (type num, value num)] - METADATA_OLD_FN_NODE = 9, // OLD_FN_NODE: [n x (type num, value num)] - METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes] - METADATA_ATTACHMENT = 11, // [m x [value, [n x [id, mdnode]]] - METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num] - METADATA_SUBRANGE = 13, // [distinct, count, lo] - METADATA_ENUMERATOR = 14, // [distinct, value, name] - METADATA_BASIC_TYPE = 15, // [distinct, tag, name, size, align, enc] - METADATA_FILE = 16, // [distinct, filename, directory, checksumkind, checksum] - METADATA_DERIVED_TYPE = 17, // [distinct, ...] - METADATA_COMPOSITE_TYPE = 18, // [distinct, ...] - METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types, cc] - METADATA_COMPILE_UNIT = 20, // [distinct, ...] - METADATA_SUBPROGRAM = 21, // [distinct, ...] - METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column] + METADATA_STRING_OLD = 1, // MDSTRING: [values] + METADATA_VALUE = 2, // VALUE: [type num, value num] + METADATA_NODE = 3, // NODE: [n x md num] + METADATA_NAME = 4, // STRING: [values] + METADATA_DISTINCT_NODE = 5, // DISTINCT_NODE: [n x md num] + METADATA_KIND = 6, // [n x [id, name]] + METADATA_LOCATION = 7, // [distinct, line, col, scope, inlined-at?] + METADATA_OLD_NODE = 8, // OLD_NODE: [n x (type num, value num)] + METADATA_OLD_FN_NODE = 9, // OLD_FN_NODE: [n x (type num, value num)] + METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes] + METADATA_ATTACHMENT = 11, // [m x [value, [n x [id, mdnode]]] + METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num] + METADATA_SUBRANGE = 13, // [distinct, count, lo] + METADATA_ENUMERATOR = 14, // [distinct, value, name] + METADATA_BASIC_TYPE = 15, // [distinct, tag, name, size, align, enc] + METADATA_FILE = 16, // [distinct, filename, directory, checksumkind, checksum] + METADATA_DERIVED_TYPE = 17, // [distinct, ...] + METADATA_COMPOSITE_TYPE = 18, // [distinct, ...] + METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types, cc] + METADATA_COMPILE_UNIT = 20, // [distinct, ...] + METADATA_SUBPROGRAM = 21, // [distinct, ...] + METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column] METADATA_LEXICAL_BLOCK_FILE = 23, //[distinct, scope, file, discriminator] - METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols] + METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols] METADATA_TEMPLATE_TYPE = 25, // [distinct, scope, name, type, ...] METADATA_TEMPLATE_VALUE = 26, // [distinct, scope, name, type, value, ...] METADATA_GLOBAL_VAR = 27, // [distinct, ...] @@ -254,7 +254,9 @@ enum MetadataCodes { METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars]) METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]] - METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr] + METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr] + METADATA_INDEX_OFFSET = 38, // [offset] + METADATA_INDEX = 39, // [bitpos] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 9d2bea8ccae..b7f753e1d62 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -38,6 +38,11 @@ using namespace llvm; namespace { + +cl::opt + IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25), + cl::desc("Number of metadatas above which we emit an index " + "to enable lazy-loading")); /// These are manifest constants used by the bitcode writer. They do not need to /// be kept in sync with the reader, but need to be consistent within this file. enum { @@ -224,7 +229,9 @@ private: void writeMetadataStrings(ArrayRef Strings, SmallVectorImpl &Record); void writeMetadataRecords(ArrayRef MDs, - SmallVectorImpl &Record); + SmallVectorImpl &Record, + std::vector *MDAbbrevs = nullptr, + std::vector *IndexPos = nullptr); void writeModuleMetadata(); void writeFunctionMetadata(const Function &F); void writeFunctionMetadataAttachment(const Function &F); @@ -1854,8 +1861,16 @@ void ModuleBitcodeWriter::writeMetadataStrings( Record.clear(); } +// Generates an enum to use as an index in the Abbrev array of Metadata record. +enum MetadataAbbrev : unsigned { +#define HANDLE_MDNODE_LEAF(CLASS) CLASS##AbbrevID, +#include "llvm/IR/Metadata.def" + LastPlusOne +}; + void ModuleBitcodeWriter::writeMetadataRecords( - ArrayRef MDs, SmallVectorImpl &Record) { + ArrayRef MDs, SmallVectorImpl &Record, + std::vector *MDAbbrevs, std::vector *IndexPos) { if (MDs.empty()) return; @@ -1864,6 +1879,8 @@ void ModuleBitcodeWriter::writeMetadataRecords( #include "llvm/IR/Metadata.def" for (const Metadata *MD : MDs) { + if (IndexPos) + IndexPos->push_back(Stream.GetCurrentBitNo()); if (const MDNode *N = dyn_cast(MD)) { assert(N->isResolved() && "Expected forward references to be resolved"); @@ -1872,7 +1889,11 @@ void ModuleBitcodeWriter::writeMetadataRecords( llvm_unreachable("Invalid MDNode subclass"); #define HANDLE_MDNODE_LEAF(CLASS) \ case Metadata::CLASS##Kind: \ - write##CLASS(cast(N), Record, CLASS##Abbrev); \ + if (MDAbbrevs) \ + write##CLASS(cast(N), Record, \ + (*MDAbbrevs)[MetadataAbbrev::CLASS##AbbrevID]); \ + else \ + write##CLASS(cast(N), Record, CLASS##Abbrev); \ continue; #include "llvm/IR/Metadata.def" } @@ -1885,10 +1906,76 @@ void ModuleBitcodeWriter::writeModuleMetadata() { if (!VE.hasMDs() && M.named_metadata_empty()) return; - Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); + Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 4); SmallVector Record; + + // Emit all abbrevs upfront, so that the reader can jump in the middle of the + // block and load any metadata. + std::vector MDAbbrevs; + + MDAbbrevs.resize(MetadataAbbrev::LastPlusOne); + MDAbbrevs[MetadataAbbrev::DILocationAbbrevID] = createDILocationAbbrev(); + MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] = + createGenericDINodeAbbrev(); + + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 64)); + unsigned OffsetAbbrev = Stream.EmitAbbrev(Abbv); + + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); + unsigned IndexAbbrev = Stream.EmitAbbrev(Abbv); + + // Emit MDStrings together upfront. writeMetadataStrings(VE.getMDStrings(), Record); - writeMetadataRecords(VE.getNonMDStrings(), Record); + + // We only emit an index for the metadata record if we have more than a given + // (naive) threshold of metadatas, otherwise it is not worth it. + if (VE.getNonMDStrings().size() > IndexThreshold) { + // Write a placeholder value in for the offset of the metadata index, + // which is written after the records, so that it can include + // the offset of each entry. The placeholder offset will be + // updated after all records are emitted. + uint64_t Vals[] = {0}; + Stream.EmitRecord(bitc::METADATA_INDEX_OFFSET, Vals, OffsetAbbrev); + } + + // Compute and save the bit offset to the current position, which will be + // patched when we emit the index later. We can simply subtract the 64-bit + // fixed size from the current bit number to get the location to backpatch. + uint64_t IndexOffsetRecordBitPos = Stream.GetCurrentBitNo(); + + // This index will contain the bitpos for each individual record. + std::vector IndexPos; + IndexPos.reserve(VE.getNonMDStrings().size()); + + // Write all the records + writeMetadataRecords(VE.getNonMDStrings(), Record, &MDAbbrevs, &IndexPos); + + if (VE.getNonMDStrings().size() > IndexThreshold) { + // Now that we have emitted all the records we will emit the index. But + // first + // backpatch the forward reference so that the reader can skip the records + // efficiently. + Stream.BackpatchWord64(IndexOffsetRecordBitPos - 64, + Stream.GetCurrentBitNo() - IndexOffsetRecordBitPos); + + // Delta encode the index. + uint64_t PreviousValue = IndexOffsetRecordBitPos; + for (auto &Elt : IndexPos) { + auto EltDelta = Elt - PreviousValue; + PreviousValue = Elt; + Elt = EltDelta; + } + // Emit the index record. + Stream.EmitRecord(bitc::METADATA_INDEX, IndexPos, IndexAbbrev); + IndexPos.clear(); + } + + // Write the named metadata now. writeNamedMetadata(Record); auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) { diff --git a/test/Bitcode/mdnodes-distinct-in-post-order.ll b/test/Bitcode/mdnodes-distinct-in-post-order.ll index 6e6ba604235..cefc1f5163f 100644 --- a/test/Bitcode/mdnodes-distinct-in-post-order.ll +++ b/test/Bitcode/mdnodes-distinct-in-post-order.ll @@ -1,4 +1,5 @@ -; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s +; RUN: llvm-as <%s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=CHECK -check-prefix=MDINDEX +; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=CHECK ; Check that distinct nodes are emitted in post-order to avoid unnecessary ; forward references. @@ -17,6 +18,11 @@ ; CHECK-NEXT: !4 = distinct !{!1, !3, !2} +; Before the named records we emit the index containing the position of the +; previously emitted records, but only if we have a number of record above +; a threshold (can be controlled through `-bitcode-mdindex-threshold`). +; MDINDEX: !3 = !{!2} +; Before the named records we emit the index containing the position of the +; previously emitted records +; CHECK-NEXT: !2 = !{!1} +; Before the named records we emit the index containing the position of the +; previously emitted records +; CHECK-NEXT: ; CHECK-NEXT: @@ -27,6 +30,10 @@ ; CHECK-NEXT: !6 = !{!3, !5, !4} +; Before the named records we emit the index containing the position of the +; previously emitted records +; CHECK-NEXT: !4 = !{!"named"} diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index e494343bf87..3220063275b 100644 --- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -353,6 +353,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, STRINGIFY_CODE(METADATA, OBJC_PROPERTY) STRINGIFY_CODE(METADATA, IMPORTED_ENTITY) STRINGIFY_CODE(METADATA, MODULE) + STRINGIFY_CODE(METADATA, INDEX_OFFSET) + STRINGIFY_CODE(METADATA, INDEX) } case bitc::METADATA_KIND_BLOCK_ID: switch (CodeID) { @@ -514,6 +516,9 @@ static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo, SmallVector Record; + // Keep the offset to the metadata index if seen. + uint64_t MetadataIndexOffset = 0; + // Read all the records for this block. while (1) { if (Stream.AtEndOfStream()) @@ -600,6 +605,22 @@ static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo, for (unsigned i = 0, e = Record.size(); i != e; ++i) outs() << " op" << i << "=" << (int64_t)Record[i]; + // If we found a metadata index, let's verify that we had an offset before + // and validate its forward reference offset was correct! + if (BlockID == bitc::METADATA_BLOCK_ID) { + if (Code == bitc::METADATA_INDEX_OFFSET) { + MetadataIndexOffset = Stream.GetCurrentBitNo() + Record[0]; + } + if (Code == bitc::METADATA_INDEX) { + outs() << " (offset "; + if (MetadataIndexOffset == RecordStartBit) + outs() << "match)"; + else + outs() << "mismatch: " << MetadataIndexOffset << " vs " + << RecordStartBit << ")"; + } + } + // If we found a module hash, let's verify that it matches! if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) { if (Record.size() != 5)