From: Peter Collingbourne Date: Tue, 21 Jun 2016 23:42:48 +0000 (+0000) Subject: IR: Allow metadata attachments on declarations, and fix lazy loaded metadata issue... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=99e2e27b37142953f163c33c85203cfa3aa4687d;p=llvm IR: Allow metadata attachments on declarations, and fix lazy loaded metadata issue with globals. This change is motivated by an upcoming change to the metadata representation used for CFI. The indirect function call checker needs type information for external function declarations in order to correctly generate jump table entries for such declarations. We currently associate such type information with declarations using a global metadata node, but I plan [1] to move all such metadata to global object attachments. In bitcode, metadata attachments for function declarations appear in the global metadata block. This seems reasonable to me because I expect metadata attachments on declarations to be uncommon. In the long term I'd also expect this to be the case for CFI, because we'd want to use some specialized bitcode format for this metadata that could be read as part of the ThinLTO thin-link phase, which would mean that it would not appear in the global metadata block. To solve the lazy loaded metadata issue I was seeing with D20147, I use the same bitcode representation for metadata attachments for global variables as I do for function declarations. Since there's a use case for metadata attachments in the global metadata block, we might as well use that representation for global variables as well, at least until we have a mechanism for lazy loading global variables. In the assembly format, the metadata attachments appear after the "declare" keyword in order to avoid a parsing ambiguity. [1] http://lists.llvm.org/pipermail/llvm-dev/2016-June/100462.html Differential Revision: http://reviews.llvm.org/D21052 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273336 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index f9989936a1f..ffa21763252 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -862,16 +862,6 @@ be one ``GCNAME`` record for each garbage collector name referenced in function ``gc`` attributes within the module. These records can be referenced by 1-based index in the *gc* fields of ``FUNCTION`` records. -MODULE_CODE_GLOBALVAR_ATTACHMENT Record -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -``[GLOBALVAR_ATTACHMENT, valueid, n x [id, mdnode]]`` - -The ``GLOBALVAR_ATTACHMENT`` record (code 19) describes the metadata -attachments for a global variable. The ``valueid`` is the value index for -the global variable, and the remaining fields are pairs of metadata name -indices and metadata node indices. - .. _PARAMATTR_BLOCK: PARAMATTR_BLOCK Contents diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index ce07f122d5f..d2a6a195411 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -113,9 +113,6 @@ enum ModuleCodes { // IFUNC: [ifunc value type, addrspace, resolver val#, linkage, visibility] MODULE_CODE_IFUNC = 18, - - // GLOBALVAR_ATTACHMENT: [valueid, n x [id, mdnode]] - MODULE_CODE_GLOBALVAR_ATTACHMENT = 19, }; /// PARAMATTR blocks have code for defining a parameter attribute set. @@ -260,6 +257,7 @@ enum MetadataCodes { METADATA_MACRO = 33, // [distinct, macinfo, line, name, value] METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars]) + METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 2725386f8da..691625209e4 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -397,8 +397,21 @@ bool LLParser::ParseDeclare() { assert(Lex.getKind() == lltok::kw_declare); Lex.Lex(); + std::vector> MDs; + while (Lex.getKind() == lltok::MetadataVar) { + unsigned MDK; + MDNode *N; + if (ParseMetadataAttachment(MDK, N)) + return true; + MDs.push_back({MDK, N}); + } + Function *F; - return ParseFunctionHeader(F, false); + if (ParseFunctionHeader(F, false)) + return true; + for (auto &MD : MDs) + F->addMetadata(MD.first, *MD.second); + return false; } /// toplevelentity diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index ac534f35bb4..a89bb07f052 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2692,6 +2692,16 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { parseMetadataStrings(Record, Blob, NextMetadataNo)) return EC; break; + case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: { + if (Record.size() % 2 == 0) + return error("Invalid record"); + unsigned ValueID = Record[0]; + if (ValueID >= ValueList.size()) + return error("Invalid record"); + if (auto *GO = dyn_cast(ValueList[ValueID])) + parseGlobalObjectAttachment(*GO, ArrayRef(Record).slice(1)); + break; + } case bitc::METADATA_KIND: { // Support older bitcode files that had METADATA_KIND records in a // block with METADATA_BLOCK_ID. @@ -3840,16 +3850,6 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit, break; } - case bitc::MODULE_CODE_GLOBALVAR_ATTACHMENT: { - if (Record.size() % 2 == 0) - return error("Invalid record"); - unsigned ValueID = Record[0]; - if (ValueID >= ValueList.size()) - return error("Invalid record"); - if (auto *GV = dyn_cast(ValueList[ValueID])) - parseGlobalObjectAttachment(*GV, ArrayRef(Record).slice(1)); - break; - } // FUNCTION: [type, callingconv, isproto, linkage, paramattr, // alignment, section, visibility, gc, unnamed_addr, // prologuedata, dllstorageclass, comdat, prefixdata] diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 4699e7dac77..20cbc2bf4b6 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -227,7 +227,7 @@ private: void writeGlobalVariableMetadataAttachment(const GlobalVariable &GV); void pushGlobalMetadataAttachment(SmallVectorImpl &Record, const GlobalObject &GO); - void writeModuleMetadataStore(); + void writeModuleMetadataKinds(); void writeOperandBundleTags(); void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal); void writeModuleConstants(); @@ -1832,6 +1832,22 @@ void ModuleBitcodeWriter::writeModuleMetadata() { writeMetadataStrings(VE.getMDStrings(), Record); writeMetadataRecords(VE.getNonMDStrings(), Record); writeNamedMetadata(Record); + + auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) { + SmallVector Record; + Record.push_back(VE.getValueID(&GO)); + pushGlobalMetadataAttachment(Record, GO); + Stream.EmitRecord(bitc::METADATA_GLOBAL_DECL_ATTACHMENT, Record); + }; + for (const Function &F : M) + if (F.isDeclaration() && F.hasMetadata()) + AddDeclAttachedMetadata(F); + // FIXME: Only store metadata for declarations here, and move data for global + // variable definitions to a separate block (PR28134). + for (const GlobalVariable &GV : M.globals()) + if (GV.hasMetadata()) + AddDeclAttachedMetadata(GV); + Stream.ExitBlock(); } @@ -1892,7 +1908,7 @@ void ModuleBitcodeWriter::writeFunctionMetadataAttachment(const Function &F) { Stream.ExitBlock(); } -void ModuleBitcodeWriter::writeModuleMetadataStore() { +void ModuleBitcodeWriter::writeModuleMetadataKinds() { SmallVector Record; // Write metadata kinds @@ -3593,11 +3609,11 @@ void ModuleBitcodeWriter::writeModule() { // Emit constants. writeModuleConstants(); - // Emit metadata. - writeModuleMetadata(); + // Emit metadata kind names. + writeModuleMetadataKinds(); // Emit metadata. - writeModuleMetadataStore(); + writeModuleMetadata(); // Emit module-level use-lists. if (VE.shouldPreserveUseListOrder()) @@ -3619,14 +3635,6 @@ void ModuleBitcodeWriter::writeModule() { writeValueSymbolTable(M.getValueSymbolTable(), /* IsModuleLevel */ true, &FunctionToBitcodeIndex); - for (const GlobalVariable &GV : M.globals()) - if (GV.hasMetadata()) { - SmallVector Record; - Record.push_back(VE.getValueID(&GV)); - pushGlobalMetadataAttachment(Record, GV); - Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR_ATTACHMENT, Record); - } - if (GenerateHash) { writeModuleHash(BlockStartPos); } diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 98eb7faf224..398f7d7d00a 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -348,7 +348,10 @@ ValueEnumerator::ValueEnumerator(const Module &M, MDs.clear(); GV.getAllMetadata(MDs); for (const auto &I : MDs) - EnumerateMetadata(&GV, I.second); + // FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer + // to write metadata to the global variable's own metadata block + // (PR28134). + EnumerateMetadata(nullptr, I.second); } // Enumerate types used by function bodies and argument lists. @@ -360,7 +363,7 @@ ValueEnumerator::ValueEnumerator(const Module &M, MDs.clear(); F.getAllMetadata(MDs); for (const auto &I : MDs) - EnumerateMetadata(&F, I.second); + EnumerateMetadata(F.isDeclaration() ? nullptr : &F, I.second); for (const BasicBlock &BB : F) for (const Instruction &I : BB) { @@ -530,18 +533,17 @@ void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { EnumerateMetadata(nullptr, MD->getOperand(i)); } -unsigned ValueEnumerator::getMetadataGlobalID(const GlobalObject *GO) const { - return GO ? getValueID(GO) + 1 : 0; +unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const { + return F ? getValueID(F) + 1 : 0; } -void ValueEnumerator::EnumerateMetadata(const GlobalObject *GO, - const Metadata *MD) { - EnumerateMetadata(getMetadataGlobalID(GO), MD); +void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) { + EnumerateMetadata(getMetadataFunctionID(F), MD); } void ValueEnumerator::EnumerateFunctionLocalMetadata( const Function &F, const LocalAsMetadata *Local) { - EnumerateFunctionLocalMetadata(getMetadataGlobalID(&F), Local); + EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local); } void ValueEnumerator::dropFunctionFromMetadata( diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 34d33fc418b..bff2de70b3e 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -255,7 +255,7 @@ private: /// it's an \a MDNode. const MDNode *enumerateMetadataImpl(unsigned F, const Metadata *MD); - unsigned getMetadataGlobalID(const GlobalObject *GO) const; + unsigned getMetadataFunctionID(const Function *F) const; /// Enumerate reachable metadata in (almost) post-order. /// @@ -272,7 +272,7 @@ private: /// \a organizeMetadata() will later partition distinct nodes ahead of /// uniqued ones. ///{ - void EnumerateMetadata(const GlobalObject *GO, const Metadata *MD); + void EnumerateMetadata(const Function *F, const Metadata *MD); void EnumerateMetadata(unsigned F, const Metadata *MD); ///} diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 3404ae7dc7f..17b622458c4 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -2616,9 +2616,15 @@ void AssemblyWriter::printFunction(const Function *F) { Out << "; Function Attrs: " << AttrStr << '\n'; } - if (F->isDeclaration()) - Out << "declare "; - else + Machine.incorporateFunction(F); + + if (F->isDeclaration()) { + Out << "declare"; + SmallVector, 4> MDs; + F->getAllMetadata(MDs); + printMetadataAttachments(MDs, " "); + Out << ' '; + } else Out << "define "; Out << getLinkagePrintName(F->getLinkage()); @@ -2638,7 +2644,6 @@ void AssemblyWriter::printFunction(const Function *F) { Out << ' '; WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent()); Out << '('; - Machine.incorporateFunction(F); // Loop over the arguments, printing them... if (F->isDeclaration() && !IsForDebug) { @@ -2698,13 +2703,13 @@ void AssemblyWriter::printFunction(const Function *F) { writeOperand(F->getPersonalityFn(), /*PrintType=*/true); } - SmallVector, 4> MDs; - F->getAllMetadata(MDs); - printMetadataAttachments(MDs, " "); - if (F->isDeclaration()) { Out << '\n'; } else { + SmallVector, 4> MDs; + F->getAllMetadata(MDs); + printMetadataAttachments(MDs, " "); + Out << " {"; // Output all of the function's basic blocks. for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index ecba8be7eb8..7297d658bb9 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -1956,8 +1956,15 @@ void Verifier::visitFunction(const Function &F) { Assert(MDs.empty(), "unmaterialized function cannot have metadata", &F, MDs.empty() ? nullptr : MDs.front().second); } else if (F.isDeclaration()) { - Assert(MDs.empty(), "function without a body cannot have metadata", &F, - MDs.empty() ? nullptr : MDs.front().second); + for (const auto &I : MDs) { + AssertDI(I.first != LLVMContext::MD_dbg, + "function declaration may not have a !dbg attachment", &F); + Assert(I.first != LLVMContext::MD_prof, + "function declaration may not have a !prof attachment", &F); + + // Verify the metadata itself. + visitMDNode(*I.second); + } Assert(!F.hasPersonalityFn(), "Function declaration shouldn't have a personality routine", &F); } else { diff --git a/test/Assembler/metadata-decl.ll b/test/Assembler/metadata-decl.ll new file mode 100644 index 00000000000..4f28638fd0f --- /dev/null +++ b/test/Assembler/metadata-decl.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s +; RUN: llvm-as < %s | llvm-dis -materialize-metadata | FileCheck %s + +; CHECK: @foo = external global i32, !foo !0 +@foo = external global i32, !foo !0 + +; CHECK: declare !bar !1 void @bar() +declare !bar !1 void @bar() + +!0 = distinct !{} +!1 = distinct !{} diff --git a/test/Assembler/metadata.ll b/test/Assembler/metadata.ll index a4b9c8af41d..5b62bfafa6d 100644 --- a/test/Assembler/metadata.ll +++ b/test/Assembler/metadata.ll @@ -1,7 +1,8 @@ -; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck --check-prefix=CHECK --check-prefix=CHECK-UNMAT %s +; RUN: llvm-as < %s | llvm-dis -materialize-metadata | FileCheck --check-prefix=CHECK-UNMAT %s ; RUN: verify-uselistorder %s -; CHECK: @global = global i32 0, !foo [[M2:![0-9]+]], !foo [[M3:![0-9]+]], !baz [[M3]] +; CHECK-UNMAT: @global = global i32 0, !foo [[M2:![0-9]+]], !foo [[M3:![0-9]+]], !baz [[M3]] @global = global i32 0, !foo !2, !foo !3, !baz !3 ; CHECK-LABEL: @test @@ -32,8 +33,8 @@ define void @test_attachment_name() { unreachable, !\34\32abc !4 } -; CHECK: [[M2]] = distinct !{} -; CHECK: [[M3]] = distinct !{} +; CHECK-UNMAT: [[M2]] = distinct !{} +; CHECK-UNMAT: [[M3]] = distinct !{} ; CHECK: [[M0]] = !DILocation ; CHECK: [[M1]] = distinct !DISubprogram ; CHECK: [[M4]] = distinct !{} diff --git a/test/Verifier/metadata-function-dbg.ll b/test/Verifier/metadata-function-dbg.ll index 77f7de26c87..24989ed7aa2 100644 --- a/test/Verifier/metadata-function-dbg.ll +++ b/test/Verifier/metadata-function-dbg.ll @@ -1,11 +1,14 @@ ; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s -define void @foo() !dbg !4 { +; CHECK: function declaration may not have a !dbg attachment +declare !dbg !4 void @f1() + +define void @f2() !dbg !4 { unreachable } ; CHECK: function must have a single !dbg attachment -define void @foo2() !dbg !4 !dbg !4 { +define void @f3() !dbg !4 !dbg !4 { unreachable } diff --git a/test/Verifier/metadata-function-prof.ll b/test/Verifier/metadata-function-prof.ll index ca0628f44f8..d84a7fe5440 100644 --- a/test/Verifier/metadata-function-prof.ll +++ b/test/Verifier/metadata-function-prof.ll @@ -1,11 +1,14 @@ ; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s -define void @foo() !prof !0 { +; CHECK: function declaration may not have a !prof attachment +declare !prof !0 void @f1() + +define void @f2() !prof !0 { unreachable } ; CHECK: function must have a single !prof attachment -define void @foo2() !prof !0 !prof !0 { +define void @f3() !prof !0 !prof !0 { unreachable } diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp index 46892b6731a..88333aeb688 100644 --- a/tools/llvm-dis/llvm-dis.cpp +++ b/tools/llvm-dis/llvm-dis.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/DataStream.h" +#include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/ManagedStatic.h" @@ -59,6 +60,11 @@ static cl::opt PreserveAssemblyUseListOrder( cl::desc("Preserve use-list order when writing LLVM assembly."), cl::init(false), cl::Hidden); +static cl::opt + MaterializeMetadata("materialize-metadata", + cl::desc("Load module without materializing metadata, " + "then materialize only the metadata")); + namespace { static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) { @@ -132,6 +138,37 @@ static void diagnosticHandler(const DiagnosticInfo &DI, void *Context) { exit(1); } +static Expected> openInputFile(LLVMContext &Context) { + if (MaterializeMetadata) { + ErrorOr> MBOrErr = + MemoryBuffer::getFileOrSTDIN(InputFilename); + if (!MBOrErr) + return errorCodeToError(MBOrErr.getError()); + ErrorOr> MOrErr = + getLazyBitcodeModule(std::move(*MBOrErr), Context, + /*ShouldLazyLoadMetadata=*/true); + if (!MOrErr) + return errorCodeToError(MOrErr.getError()); + (*MOrErr)->materializeMetadata(); + return std::move(*MOrErr); + } else { + std::string ErrorMessage; + std::unique_ptr Streamer = + getDataFileStreamer(InputFilename, &ErrorMessage); + if (!Streamer) + return make_error(ErrorMessage, inconvertibleErrorCode()); + std::string DisplayFilename; + if (InputFilename == "-") + DisplayFilename = ""; + else + DisplayFilename = InputFilename; + ErrorOr> MOrErr = + getStreamedBitcodeModule(DisplayFilename, std::move(Streamer), Context); + (*MOrErr)->materializeAll(); + return std::move(*MOrErr); + } +} + int main(int argc, char **argv) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(argv[0]); @@ -144,26 +181,16 @@ int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n"); - std::string ErrorMessage; - std::unique_ptr M; - - // Use the bitcode streaming interface - std::unique_ptr Streamer = - getDataFileStreamer(InputFilename, &ErrorMessage); - if (Streamer) { - std::string DisplayFilename; - if (InputFilename == "-") - DisplayFilename = ""; - else - DisplayFilename = InputFilename; - ErrorOr> MOrErr = - getStreamedBitcodeModule(DisplayFilename, std::move(Streamer), Context); - M = std::move(*MOrErr); - M->materializeAll(); - } else { - errs() << argv[0] << ": " << ErrorMessage << '\n'; + Expected> MOrErr = openInputFile(Context); + if (!MOrErr) { + handleAllErrors(MOrErr.takeError(), [&](ErrorInfoBase &EIB) { + errs() << argv[0] << ": "; + EIB.log(errs()); + errs() << '\n'; + }); return 1; } + std::unique_ptr M = std::move(*MOrErr); // Just use stdout. We won't actually print anything on it. if (DontPrint) diff --git a/unittests/IR/MetadataTest.cpp b/unittests/IR/MetadataTest.cpp index a7a28e041b3..b6cf7e4e1b5 100644 --- a/unittests/IR/MetadataTest.cpp +++ b/unittests/IR/MetadataTest.cpp @@ -2260,20 +2260,20 @@ TEST_F(FunctionAttachmentTest, getAll) { TEST_F(FunctionAttachmentTest, Verifier) { Function *F = getFunction("foo"); F->setMetadata("attach", getTuple()); + F->setIsMaterializable(true); - // Confirm this has no body. - ASSERT_TRUE(F->empty()); - - // Functions without a body cannot have metadata attachments (they also can't - // be verified directly, so check that the module fails to verify). - EXPECT_TRUE(verifyModule(*F->getParent())); + // Confirm this is materializable. + ASSERT_TRUE(F->isMaterializable()); - // Nor can materializable functions. - F->setIsMaterializable(true); - EXPECT_TRUE(verifyModule(*F->getParent())); + // Materializable functions cannot have metadata attachments. + EXPECT_TRUE(verifyFunction(*F)); - // Functions with a body can. + // Function declarations can. F->setIsMaterializable(false); + EXPECT_FALSE(verifyModule(*F->getParent())); + EXPECT_FALSE(verifyFunction(*F)); + + // So can definitions. (void)new UnreachableInst(Context, BasicBlock::Create(Context, "bb", F)); EXPECT_FALSE(verifyModule(*F->getParent())); EXPECT_FALSE(verifyFunction(*F));