IR: Allow metadata attachments on declarations, and fix lazy loaded metadata issue with globals.

This change is motivated by an upcoming change to the metadata representation
used for CFI. The indirect function call checker needs type information for
external function declarations in order to correctly generate jump table
entries for such declarations. We currently associate such type information
with declarations using a global metadata node, but I plan [1] to move all
such metadata to global object attachments.

In bitcode, metadata attachments for function declarations appear in the
global metadata block. This seems reasonable to me because I expect metadata
attachments on declarations to be uncommon. In the long term I'd also expect
this to be the case for CFI, because we'd want to use some specialized bitcode
format for this metadata that could be read as part of the ThinLTO thin-link
phase, which would mean that it would not appear in the global metadata block.

To solve the lazy loaded metadata issue I was seeing with D20147, I use the
same bitcode representation for metadata attachments for global variables as I
do for function declarations. Since there's a use case for metadata attachments
in the global metadata block, we might as well use that representation for
global variables as well, at least until we have a mechanism for lazy loading
global variables.

In the assembly format, the metadata attachments appear after the "declare"
keyword in order to avoid a parsing ambiguity.

[1] http://lists.llvm.org/pipermail/llvm-dev/2016-June/100462.html

Differential Revision: http://reviews.llvm.org/D21052

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273336 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Peter Collingbourne 2016-06-21 23:42:48 +00:00
parent 3071cc9727
commit 99e2e27b37
15 changed files with 162 additions and 94 deletions

View File

@ -862,16 +862,6 @@ be one ``GCNAME`` record for each garbage collector name referenced in function
``gc`` attributes within the module. These records can be referenced by 1-based
index in the *gc* fields of ``FUNCTION`` records.
MODULE_CODE_GLOBALVAR_ATTACHMENT Record
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
``[GLOBALVAR_ATTACHMENT, valueid, n x [id, mdnode]]``
The ``GLOBALVAR_ATTACHMENT`` record (code 19) describes the metadata
attachments for a global variable. The ``valueid`` is the value index for
the global variable, and the remaining fields are pairs of metadata name
indices and metadata node indices.
.. _PARAMATTR_BLOCK:
PARAMATTR_BLOCK Contents

View File

@ -113,9 +113,6 @@ enum ModuleCodes {
// IFUNC: [ifunc value type, addrspace, resolver val#, linkage, visibility]
MODULE_CODE_IFUNC = 18,
// GLOBALVAR_ATTACHMENT: [valueid, n x [id, mdnode]]
MODULE_CODE_GLOBALVAR_ATTACHMENT = 19,
};
/// PARAMATTR blocks have code for defining a parameter attribute set.
@ -260,6 +257,7 @@ enum MetadataCodes {
METADATA_MACRO = 33, // [distinct, macinfo, line, name, value]
METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...]
METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars])
METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]]
};
// The constants block (CONSTANTS_BLOCK_ID) describes emission for each

View File

@ -397,8 +397,21 @@ bool LLParser::ParseDeclare() {
assert(Lex.getKind() == lltok::kw_declare);
Lex.Lex();
std::vector<std::pair<unsigned, MDNode *>> MDs;
while (Lex.getKind() == lltok::MetadataVar) {
unsigned MDK;
MDNode *N;
if (ParseMetadataAttachment(MDK, N))
return true;
MDs.push_back({MDK, N});
}
Function *F;
return ParseFunctionHeader(F, false);
if (ParseFunctionHeader(F, false))
return true;
for (auto &MD : MDs)
F->addMetadata(MD.first, *MD.second);
return false;
}
/// toplevelentity

View File

@ -2692,6 +2692,16 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
parseMetadataStrings(Record, Blob, NextMetadataNo))
return EC;
break;
case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
if (Record.size() % 2 == 0)
return error("Invalid record");
unsigned ValueID = Record[0];
if (ValueID >= ValueList.size())
return error("Invalid record");
if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID]))
parseGlobalObjectAttachment(*GO, ArrayRef<uint64_t>(Record).slice(1));
break;
}
case bitc::METADATA_KIND: {
// Support older bitcode files that had METADATA_KIND records in a
// block with METADATA_BLOCK_ID.
@ -3840,16 +3850,6 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
break;
}
case bitc::MODULE_CODE_GLOBALVAR_ATTACHMENT: {
if (Record.size() % 2 == 0)
return error("Invalid record");
unsigned ValueID = Record[0];
if (ValueID >= ValueList.size())
return error("Invalid record");
if (auto *GV = dyn_cast<GlobalVariable>(ValueList[ValueID]))
parseGlobalObjectAttachment(*GV, ArrayRef<uint64_t>(Record).slice(1));
break;
}
// FUNCTION: [type, callingconv, isproto, linkage, paramattr,
// alignment, section, visibility, gc, unnamed_addr,
// prologuedata, dllstorageclass, comdat, prefixdata]

View File

@ -227,7 +227,7 @@ private:
void writeGlobalVariableMetadataAttachment(const GlobalVariable &GV);
void pushGlobalMetadataAttachment(SmallVectorImpl<uint64_t> &Record,
const GlobalObject &GO);
void writeModuleMetadataStore();
void writeModuleMetadataKinds();
void writeOperandBundleTags();
void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal);
void writeModuleConstants();
@ -1832,6 +1832,22 @@ void ModuleBitcodeWriter::writeModuleMetadata() {
writeMetadataStrings(VE.getMDStrings(), Record);
writeMetadataRecords(VE.getNonMDStrings(), Record);
writeNamedMetadata(Record);
auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) {
SmallVector<uint64_t, 4> Record;
Record.push_back(VE.getValueID(&GO));
pushGlobalMetadataAttachment(Record, GO);
Stream.EmitRecord(bitc::METADATA_GLOBAL_DECL_ATTACHMENT, Record);
};
for (const Function &F : M)
if (F.isDeclaration() && F.hasMetadata())
AddDeclAttachedMetadata(F);
// FIXME: Only store metadata for declarations here, and move data for global
// variable definitions to a separate block (PR28134).
for (const GlobalVariable &GV : M.globals())
if (GV.hasMetadata())
AddDeclAttachedMetadata(GV);
Stream.ExitBlock();
}
@ -1892,7 +1908,7 @@ void ModuleBitcodeWriter::writeFunctionMetadataAttachment(const Function &F) {
Stream.ExitBlock();
}
void ModuleBitcodeWriter::writeModuleMetadataStore() {
void ModuleBitcodeWriter::writeModuleMetadataKinds() {
SmallVector<uint64_t, 64> Record;
// Write metadata kinds
@ -3593,11 +3609,11 @@ void ModuleBitcodeWriter::writeModule() {
// Emit constants.
writeModuleConstants();
// Emit metadata.
writeModuleMetadata();
// Emit metadata kind names.
writeModuleMetadataKinds();
// Emit metadata.
writeModuleMetadataStore();
writeModuleMetadata();
// Emit module-level use-lists.
if (VE.shouldPreserveUseListOrder())
@ -3619,14 +3635,6 @@ void ModuleBitcodeWriter::writeModule() {
writeValueSymbolTable(M.getValueSymbolTable(),
/* IsModuleLevel */ true, &FunctionToBitcodeIndex);
for (const GlobalVariable &GV : M.globals())
if (GV.hasMetadata()) {
SmallVector<uint64_t, 4> Record;
Record.push_back(VE.getValueID(&GV));
pushGlobalMetadataAttachment(Record, GV);
Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR_ATTACHMENT, Record);
}
if (GenerateHash) {
writeModuleHash(BlockStartPos);
}

View File

@ -348,7 +348,10 @@ ValueEnumerator::ValueEnumerator(const Module &M,
MDs.clear();
GV.getAllMetadata(MDs);
for (const auto &I : MDs)
EnumerateMetadata(&GV, I.second);
// FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer
// to write metadata to the global variable's own metadata block
// (PR28134).
EnumerateMetadata(nullptr, I.second);
}
// Enumerate types used by function bodies and argument lists.
@ -360,7 +363,7 @@ ValueEnumerator::ValueEnumerator(const Module &M,
MDs.clear();
F.getAllMetadata(MDs);
for (const auto &I : MDs)
EnumerateMetadata(&F, I.second);
EnumerateMetadata(F.isDeclaration() ? nullptr : &F, I.second);
for (const BasicBlock &BB : F)
for (const Instruction &I : BB) {
@ -530,18 +533,17 @@ void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
EnumerateMetadata(nullptr, MD->getOperand(i));
}
unsigned ValueEnumerator::getMetadataGlobalID(const GlobalObject *GO) const {
return GO ? getValueID(GO) + 1 : 0;
unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const {
return F ? getValueID(F) + 1 : 0;
}
void ValueEnumerator::EnumerateMetadata(const GlobalObject *GO,
const Metadata *MD) {
EnumerateMetadata(getMetadataGlobalID(GO), MD);
void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) {
EnumerateMetadata(getMetadataFunctionID(F), MD);
}
void ValueEnumerator::EnumerateFunctionLocalMetadata(
const Function &F, const LocalAsMetadata *Local) {
EnumerateFunctionLocalMetadata(getMetadataGlobalID(&F), Local);
EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local);
}
void ValueEnumerator::dropFunctionFromMetadata(

View File

@ -255,7 +255,7 @@ private:
/// it's an \a MDNode.
const MDNode *enumerateMetadataImpl(unsigned F, const Metadata *MD);
unsigned getMetadataGlobalID(const GlobalObject *GO) const;
unsigned getMetadataFunctionID(const Function *F) const;
/// Enumerate reachable metadata in (almost) post-order.
///
@ -272,7 +272,7 @@ private:
/// \a organizeMetadata() will later partition distinct nodes ahead of
/// uniqued ones.
///{
void EnumerateMetadata(const GlobalObject *GO, const Metadata *MD);
void EnumerateMetadata(const Function *F, const Metadata *MD);
void EnumerateMetadata(unsigned F, const Metadata *MD);
///}

View File

@ -2616,9 +2616,15 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << "; Function Attrs: " << AttrStr << '\n';
}
if (F->isDeclaration())
Out << "declare ";
else
Machine.incorporateFunction(F);
if (F->isDeclaration()) {
Out << "declare";
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
F->getAllMetadata(MDs);
printMetadataAttachments(MDs, " ");
Out << ' ';
} else
Out << "define ";
Out << getLinkagePrintName(F->getLinkage());
@ -2638,7 +2644,6 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << ' ';
WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
Out << '(';
Machine.incorporateFunction(F);
// Loop over the arguments, printing them...
if (F->isDeclaration() && !IsForDebug) {
@ -2698,13 +2703,13 @@ void AssemblyWriter::printFunction(const Function *F) {
writeOperand(F->getPersonalityFn(), /*PrintType=*/true);
}
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
F->getAllMetadata(MDs);
printMetadataAttachments(MDs, " ");
if (F->isDeclaration()) {
Out << '\n';
} else {
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
F->getAllMetadata(MDs);
printMetadataAttachments(MDs, " ");
Out << " {";
// Output all of the function's basic blocks.
for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)

View File

@ -1956,8 +1956,15 @@ void Verifier::visitFunction(const Function &F) {
Assert(MDs.empty(), "unmaterialized function cannot have metadata", &F,
MDs.empty() ? nullptr : MDs.front().second);
} else if (F.isDeclaration()) {
Assert(MDs.empty(), "function without a body cannot have metadata", &F,
MDs.empty() ? nullptr : MDs.front().second);
for (const auto &I : MDs) {
AssertDI(I.first != LLVMContext::MD_dbg,
"function declaration may not have a !dbg attachment", &F);
Assert(I.first != LLVMContext::MD_prof,
"function declaration may not have a !prof attachment", &F);
// Verify the metadata itself.
visitMDNode(*I.second);
}
Assert(!F.hasPersonalityFn(),
"Function declaration shouldn't have a personality routine", &F);
} else {

View File

@ -0,0 +1,11 @@
; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
; RUN: llvm-as < %s | llvm-dis -materialize-metadata | FileCheck %s
; CHECK: @foo = external global i32, !foo !0
@foo = external global i32, !foo !0
; CHECK: declare !bar !1 void @bar()
declare !bar !1 void @bar()
!0 = distinct !{}
!1 = distinct !{}

View File

@ -1,7 +1,8 @@
; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck --check-prefix=CHECK --check-prefix=CHECK-UNMAT %s
; RUN: llvm-as < %s | llvm-dis -materialize-metadata | FileCheck --check-prefix=CHECK-UNMAT %s
; RUN: verify-uselistorder %s
; CHECK: @global = global i32 0, !foo [[M2:![0-9]+]], !foo [[M3:![0-9]+]], !baz [[M3]]
; CHECK-UNMAT: @global = global i32 0, !foo [[M2:![0-9]+]], !foo [[M3:![0-9]+]], !baz [[M3]]
@global = global i32 0, !foo !2, !foo !3, !baz !3
; CHECK-LABEL: @test
@ -32,8 +33,8 @@ define void @test_attachment_name() {
unreachable, !\34\32abc !4
}
; CHECK: [[M2]] = distinct !{}
; CHECK: [[M3]] = distinct !{}
; CHECK-UNMAT: [[M2]] = distinct !{}
; CHECK-UNMAT: [[M3]] = distinct !{}
; CHECK: [[M0]] = !DILocation
; CHECK: [[M1]] = distinct !DISubprogram
; CHECK: [[M4]] = distinct !{}

View File

@ -1,11 +1,14 @@
; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s
define void @foo() !dbg !4 {
; CHECK: function declaration may not have a !dbg attachment
declare !dbg !4 void @f1()
define void @f2() !dbg !4 {
unreachable
}
; CHECK: function must have a single !dbg attachment
define void @foo2() !dbg !4 !dbg !4 {
define void @f3() !dbg !4 !dbg !4 {
unreachable
}

View File

@ -1,11 +1,14 @@
; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s
define void @foo() !prof !0 {
; CHECK: function declaration may not have a !prof attachment
declare !prof !0 void @f1()
define void @f2() !prof !0 {
unreachable
}
; CHECK: function must have a single !prof attachment
define void @foo2() !prof !0 !prof !0 {
define void @f3() !prof !0 !prof !0 {
unreachable
}

View File

@ -27,6 +27,7 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DataStream.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/ManagedStatic.h"
@ -59,6 +60,11 @@ static cl::opt<bool> PreserveAssemblyUseListOrder(
cl::desc("Preserve use-list order when writing LLVM assembly."),
cl::init(false), cl::Hidden);
static cl::opt<bool>
MaterializeMetadata("materialize-metadata",
cl::desc("Load module without materializing metadata, "
"then materialize only the metadata"));
namespace {
static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
@ -132,6 +138,37 @@ static void diagnosticHandler(const DiagnosticInfo &DI, void *Context) {
exit(1);
}
static Expected<std::unique_ptr<Module>> openInputFile(LLVMContext &Context) {
if (MaterializeMetadata) {
ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
MemoryBuffer::getFileOrSTDIN(InputFilename);
if (!MBOrErr)
return errorCodeToError(MBOrErr.getError());
ErrorOr<std::unique_ptr<Module>> MOrErr =
getLazyBitcodeModule(std::move(*MBOrErr), Context,
/*ShouldLazyLoadMetadata=*/true);
if (!MOrErr)
return errorCodeToError(MOrErr.getError());
(*MOrErr)->materializeMetadata();
return std::move(*MOrErr);
} else {
std::string ErrorMessage;
std::unique_ptr<DataStreamer> Streamer =
getDataFileStreamer(InputFilename, &ErrorMessage);
if (!Streamer)
return make_error<StringError>(ErrorMessage, inconvertibleErrorCode());
std::string DisplayFilename;
if (InputFilename == "-")
DisplayFilename = "<stdin>";
else
DisplayFilename = InputFilename;
ErrorOr<std::unique_ptr<Module>> MOrErr =
getStreamedBitcodeModule(DisplayFilename, std::move(Streamer), Context);
(*MOrErr)->materializeAll();
return std::move(*MOrErr);
}
}
int main(int argc, char **argv) {
// Print a stack trace if we signal out.
sys::PrintStackTraceOnErrorSignal(argv[0]);
@ -144,26 +181,16 @@ int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n");
std::string ErrorMessage;
std::unique_ptr<Module> M;
// Use the bitcode streaming interface
std::unique_ptr<DataStreamer> Streamer =
getDataFileStreamer(InputFilename, &ErrorMessage);
if (Streamer) {
std::string DisplayFilename;
if (InputFilename == "-")
DisplayFilename = "<stdin>";
else
DisplayFilename = InputFilename;
ErrorOr<std::unique_ptr<Module>> MOrErr =
getStreamedBitcodeModule(DisplayFilename, std::move(Streamer), Context);
M = std::move(*MOrErr);
M->materializeAll();
} else {
errs() << argv[0] << ": " << ErrorMessage << '\n';
Expected<std::unique_ptr<Module>> MOrErr = openInputFile(Context);
if (!MOrErr) {
handleAllErrors(MOrErr.takeError(), [&](ErrorInfoBase &EIB) {
errs() << argv[0] << ": ";
EIB.log(errs());
errs() << '\n';
});
return 1;
}
std::unique_ptr<Module> M = std::move(*MOrErr);
// Just use stdout. We won't actually print anything on it.
if (DontPrint)

View File

@ -2260,20 +2260,20 @@ TEST_F(FunctionAttachmentTest, getAll) {
TEST_F(FunctionAttachmentTest, Verifier) {
Function *F = getFunction("foo");
F->setMetadata("attach", getTuple());
// Confirm this has no body.
ASSERT_TRUE(F->empty());
// Functions without a body cannot have metadata attachments (they also can't
// be verified directly, so check that the module fails to verify).
EXPECT_TRUE(verifyModule(*F->getParent()));
// Nor can materializable functions.
F->setIsMaterializable(true);
EXPECT_TRUE(verifyModule(*F->getParent()));
// Functions with a body can.
// Confirm this is materializable.
ASSERT_TRUE(F->isMaterializable());
// Materializable functions cannot have metadata attachments.
EXPECT_TRUE(verifyFunction(*F));
// Function declarations can.
F->setIsMaterializable(false);
EXPECT_FALSE(verifyModule(*F->getParent()));
EXPECT_FALSE(verifyFunction(*F));
// So can definitions.
(void)new UnreachableInst(Context, BasicBlock::Create(Context, "bb", F));
EXPECT_FALSE(verifyModule(*F->getParent()));
EXPECT_FALSE(verifyFunction(*F));