&Out[ByteNo], NewWord, BitNo & 7);
}
+ void BackpatchWord64(uint64_t BitNo, uint64_t Val) {
+ BackpatchWord(BitNo, (uint32_t)Val);
+ BackpatchWord(BitNo + 32, (uint32_t)(Val >> 32));
+ }
+
void Emit(uint32_t Val, unsigned NumBits) {
assert(NumBits && NumBits <= 32 && "Invalid value size!");
assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
default: llvm_unreachable("Unknown encoding!");
case BitCodeAbbrevOp::Fixed:
if (Op.getEncodingData())
- Emit((unsigned)V, (unsigned)Op.getEncodingData());
+ Emit64(V, (unsigned)Op.getEncodingData());
break;
case BitCodeAbbrevOp::VBR:
if (Op.getEncodingData())
};
enum MetadataCodes {
- METADATA_STRING_OLD = 1, // MDSTRING: [values]
- METADATA_VALUE = 2, // VALUE: [type num, value num]
- METADATA_NODE = 3, // NODE: [n x md num]
- METADATA_NAME = 4, // STRING: [values]
- METADATA_DISTINCT_NODE = 5, // DISTINCT_NODE: [n x md num]
- METADATA_KIND = 6, // [n x [id, name]]
- METADATA_LOCATION = 7, // [distinct, line, col, scope, inlined-at?]
- METADATA_OLD_NODE = 8, // OLD_NODE: [n x (type num, value num)]
- METADATA_OLD_FN_NODE = 9, // OLD_FN_NODE: [n x (type num, value num)]
- METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes]
- METADATA_ATTACHMENT = 11, // [m x [value, [n x [id, mdnode]]]
- METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num]
- METADATA_SUBRANGE = 13, // [distinct, count, lo]
- METADATA_ENUMERATOR = 14, // [distinct, value, name]
- METADATA_BASIC_TYPE = 15, // [distinct, tag, name, size, align, enc]
- METADATA_FILE = 16, // [distinct, filename, directory, checksumkind, checksum]
- METADATA_DERIVED_TYPE = 17, // [distinct, ...]
- METADATA_COMPOSITE_TYPE = 18, // [distinct, ...]
- METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types, cc]
- METADATA_COMPILE_UNIT = 20, // [distinct, ...]
- METADATA_SUBPROGRAM = 21, // [distinct, ...]
- METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column]
+ METADATA_STRING_OLD = 1, // MDSTRING: [values]
+ METADATA_VALUE = 2, // VALUE: [type num, value num]
+ METADATA_NODE = 3, // NODE: [n x md num]
+ METADATA_NAME = 4, // STRING: [values]
+ METADATA_DISTINCT_NODE = 5, // DISTINCT_NODE: [n x md num]
+ METADATA_KIND = 6, // [n x [id, name]]
+ METADATA_LOCATION = 7, // [distinct, line, col, scope, inlined-at?]
+ METADATA_OLD_NODE = 8, // OLD_NODE: [n x (type num, value num)]
+ METADATA_OLD_FN_NODE = 9, // OLD_FN_NODE: [n x (type num, value num)]
+ METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes]
+ METADATA_ATTACHMENT = 11, // [m x [value, [n x [id, mdnode]]]
+ METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num]
+ METADATA_SUBRANGE = 13, // [distinct, count, lo]
+ METADATA_ENUMERATOR = 14, // [distinct, value, name]
+ METADATA_BASIC_TYPE = 15, // [distinct, tag, name, size, align, enc]
+ METADATA_FILE = 16, // [distinct, filename, directory, checksumkind, checksum]
+ METADATA_DERIVED_TYPE = 17, // [distinct, ...]
+ METADATA_COMPOSITE_TYPE = 18, // [distinct, ...]
+ METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types, cc]
+ METADATA_COMPILE_UNIT = 20, // [distinct, ...]
+ METADATA_SUBPROGRAM = 21, // [distinct, ...]
+ METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column]
METADATA_LEXICAL_BLOCK_FILE = 23, //[distinct, scope, file, discriminator]
- METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols]
+ METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols]
METADATA_TEMPLATE_TYPE = 25, // [distinct, scope, name, type, ...]
METADATA_TEMPLATE_VALUE = 26, // [distinct, scope, name, type, value, ...]
METADATA_GLOBAL_VAR = 27, // [distinct, ...]
METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...]
METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars])
METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]]
- METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr]
+ METADATA_GLOBAL_VAR_EXPR = 37, // [distinct, var, expr]
+ METADATA_INDEX_OFFSET = 38, // [offset]
+ METADATA_INDEX = 39, // [bitpos]
};
// The constants block (CONSTANTS_BLOCK_ID) describes emission for each
using namespace llvm;
namespace {
+
+cl::opt<unsigned>
+ IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25),
+ cl::desc("Number of metadatas above which we emit an index "
+ "to enable lazy-loading"));
/// These are manifest constants used by the bitcode writer. They do not need to
/// be kept in sync with the reader, but need to be consistent within this file.
enum {
void writeMetadataStrings(ArrayRef<const Metadata *> Strings,
SmallVectorImpl<uint64_t> &Record);
void writeMetadataRecords(ArrayRef<const Metadata *> MDs,
- SmallVectorImpl<uint64_t> &Record);
+ SmallVectorImpl<uint64_t> &Record,
+ std::vector<unsigned> *MDAbbrevs = nullptr,
+ std::vector<uint64_t> *IndexPos = nullptr);
void writeModuleMetadata();
void writeFunctionMetadata(const Function &F);
void writeFunctionMetadataAttachment(const Function &F);
Record.clear();
}
+// Generates an enum to use as an index in the Abbrev array of Metadata record.
+enum MetadataAbbrev : unsigned {
+#define HANDLE_MDNODE_LEAF(CLASS) CLASS##AbbrevID,
+#include "llvm/IR/Metadata.def"
+ LastPlusOne
+};
+
void ModuleBitcodeWriter::writeMetadataRecords(
- ArrayRef<const Metadata *> MDs, SmallVectorImpl<uint64_t> &Record) {
+ ArrayRef<const Metadata *> MDs, SmallVectorImpl<uint64_t> &Record,
+ std::vector<unsigned> *MDAbbrevs, std::vector<uint64_t> *IndexPos) {
if (MDs.empty())
return;
#include "llvm/IR/Metadata.def"
for (const Metadata *MD : MDs) {
+ if (IndexPos)
+ IndexPos->push_back(Stream.GetCurrentBitNo());
if (const MDNode *N = dyn_cast<MDNode>(MD)) {
assert(N->isResolved() && "Expected forward references to be resolved");
llvm_unreachable("Invalid MDNode subclass");
#define HANDLE_MDNODE_LEAF(CLASS) \
case Metadata::CLASS##Kind: \
- write##CLASS(cast<CLASS>(N), Record, CLASS##Abbrev); \
+ if (MDAbbrevs) \
+ write##CLASS(cast<CLASS>(N), Record, \
+ (*MDAbbrevs)[MetadataAbbrev::CLASS##AbbrevID]); \
+ else \
+ write##CLASS(cast<CLASS>(N), Record, CLASS##Abbrev); \
continue;
#include "llvm/IR/Metadata.def"
}
if (!VE.hasMDs() && M.named_metadata_empty())
return;
- Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 4);
SmallVector<uint64_t, 64> Record;
+
+ // Emit all abbrevs upfront, so that the reader can jump in the middle of the
+ // block and load any metadata.
+ std::vector<unsigned> MDAbbrevs;
+
+ MDAbbrevs.resize(MetadataAbbrev::LastPlusOne);
+ MDAbbrevs[MetadataAbbrev::DILocationAbbrevID] = createDILocationAbbrev();
+ MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] =
+ createGenericDINodeAbbrev();
+
+ BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 64));
+ unsigned OffsetAbbrev = Stream.EmitAbbrev(Abbv);
+
+ Abbv = new BitCodeAbbrev();
+ Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+ unsigned IndexAbbrev = Stream.EmitAbbrev(Abbv);
+
+ // Emit MDStrings together upfront.
writeMetadataStrings(VE.getMDStrings(), Record);
- writeMetadataRecords(VE.getNonMDStrings(), Record);
+
+ // We only emit an index for the metadata record if we have more than a given
+ // (naive) threshold of metadatas, otherwise it is not worth it.
+ if (VE.getNonMDStrings().size() > IndexThreshold) {
+ // Write a placeholder value in for the offset of the metadata index,
+ // which is written after the records, so that it can include
+ // the offset of each entry. The placeholder offset will be
+ // updated after all records are emitted.
+ uint64_t Vals[] = {0};
+ Stream.EmitRecord(bitc::METADATA_INDEX_OFFSET, Vals, OffsetAbbrev);
+ }
+
+ // Compute and save the bit offset to the current position, which will be
+ // patched when we emit the index later. We can simply subtract the 64-bit
+ // fixed size from the current bit number to get the location to backpatch.
+ uint64_t IndexOffsetRecordBitPos = Stream.GetCurrentBitNo();
+
+ // This index will contain the bitpos for each individual record.
+ std::vector<uint64_t> IndexPos;
+ IndexPos.reserve(VE.getNonMDStrings().size());
+
+ // Write all the records
+ writeMetadataRecords(VE.getNonMDStrings(), Record, &MDAbbrevs, &IndexPos);
+
+ if (VE.getNonMDStrings().size() > IndexThreshold) {
+ // Now that we have emitted all the records we will emit the index. But
+ // first
+ // backpatch the forward reference so that the reader can skip the records
+ // efficiently.
+ Stream.BackpatchWord64(IndexOffsetRecordBitPos - 64,
+ Stream.GetCurrentBitNo() - IndexOffsetRecordBitPos);
+
+ // Delta encode the index.
+ uint64_t PreviousValue = IndexOffsetRecordBitPos;
+ for (auto &Elt : IndexPos) {
+ auto EltDelta = Elt - PreviousValue;
+ PreviousValue = Elt;
+ Elt = EltDelta;
+ }
+ // Emit the index record.
+ Stream.EmitRecord(bitc::METADATA_INDEX, IndexPos, IndexAbbrev);
+ IndexPos.clear();
+ }
+
+ // Write the named metadata now.
writeNamedMetadata(Record);
auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) {
-; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s
+; RUN: llvm-as <%s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=CHECK -check-prefix=MDINDEX
+; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=CHECK
; Check that distinct nodes are emitted in post-order to avoid unnecessary
; forward references.
; CHECK-NEXT: <DISTINCT_NODE op0=1 op1=3 op2=2/>
!4 = distinct !{!1, !3, !2}
+; Before the named records we emit the index containing the position of the
+; previously emitted records, but only if we have a number of record above
+; a threshold (can be controlled through `-bitcode-mdindex-threshold`).
+; MDINDEX: <INDEX {{.*}} (offset match)
+
; Note: named metadata nodes are not cannot reference null so their operands
; are numbered off-by-one.
; CHECK-NEXT: <NAME
-; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s
+; RUN: llvm-as <%s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s
; Check that distinct nodes break uniquing cycles, so that uniqued subgraphs
; are always in post-order.
;
; CHECK-NEXT: <NODE op0=2/>
!3 = !{!2}
+; Before the named records we emit the index containing the position of the
+; previously emitted records
+; CHECK-NEXT: <INDEX {{.*}} (offset match)
+
; Note: named metadata nodes are not cannot reference null so their operands
; are numbered off-by-one.
; CHECK-NEXT: <NAME
-; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s
+; RUN: llvm-as <%s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s
; Check that distinct nodes are emitted before uniqued nodes, even if that
; breaks post-order traversals.
; CHECK-NEXT: <NODE op0=1/>
!2 = !{!1}
+; Before the named records we emit the index containing the position of the
+; previously emitted records
+; CHECK-NEXT: <INDEX {{.*}} (offset match)
+
; Note: named metadata nodes are not cannot reference null so their operands
; are numbered off-by-one.
; CHECK-NEXT: <NAME
-; RUN: llvm-as <%s | llvm-bcanalyzer -dump | FileCheck %s
+; RUN: llvm-as <%s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s
; Check that nodes are emitted in post-order to minimize the need for temporary
; nodes. The graph structure is designed to foil naive implementations of
; iteratitive post-order traersals: the leaves, !3 and !4, are reachable from
; CHECK-NEXT: 'leaf
; CHECK-NEXT: }
+; Before the records we emit an offset to the index for the block
+; CHECK-NEXT: <INDEX_OFFSET
+
; The leafs should come first (in either order).
; CHECK-NEXT: <NODE op0=1/>
; CHECK-NEXT: <NODE op0=2/>
; CHECK-NEXT: <NODE op0=3 op1=5 op2=4/>
!6 = !{!3, !5, !4}
+; Before the named records we emit the index containing the position of the
+; previously emitted records
+; CHECK-NEXT: <INDEX {{.*}} (offset match)
+
; Note: named metadata nodes are not cannot reference null so their operands
; are numbered off-by-one.
; CHECK-NEXT: <NAME
-; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
+; RUN: llvm-as < %s -bitcode-mdindex-threshold=0 | llvm-bcanalyzer -dump | FileCheck %s
; Test that metadata only used by a single function is serialized in that
; function instead of in the global pool.
;
; Each node gets a new number. Bottom-up traversal of nodes.
!named = !{!6}
+; Before the records we emit an offset to the index for the block
+; CHECK-NEXT: <INDEX_OFFSET
+
; CHECK-NEXT: <NODE op0=1/>
!4 = !{!"named"}
STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
STRINGIFY_CODE(METADATA, MODULE)
+ STRINGIFY_CODE(METADATA, INDEX_OFFSET)
+ STRINGIFY_CODE(METADATA, INDEX)
}
case bitc::METADATA_KIND_BLOCK_ID:
switch (CodeID) {
SmallVector<uint64_t, 64> Record;
+ // Keep the offset to the metadata index if seen.
+ uint64_t MetadataIndexOffset = 0;
+
// Read all the records for this block.
while (1) {
if (Stream.AtEndOfStream())
for (unsigned i = 0, e = Record.size(); i != e; ++i)
outs() << " op" << i << "=" << (int64_t)Record[i];
+ // If we found a metadata index, let's verify that we had an offset before
+ // and validate its forward reference offset was correct!
+ if (BlockID == bitc::METADATA_BLOCK_ID) {
+ if (Code == bitc::METADATA_INDEX_OFFSET) {
+ MetadataIndexOffset = Stream.GetCurrentBitNo() + Record[0];
+ }
+ if (Code == bitc::METADATA_INDEX) {
+ outs() << " (offset ";
+ if (MetadataIndexOffset == RecordStartBit)
+ outs() << "match)";
+ else
+ outs() << "mismatch: " << MetadataIndexOffset << " vs "
+ << RecordStartBit << ")";
+ }
+ }
+
// If we found a module hash, let's verify that it matches!
if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {
if (Record.size() != 5)