diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index eec62c09c3b..3d0752628c7 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -209,7 +209,7 @@ enum { BITCODE_CURRENT_EPOCH = 0 }; }; enum MetadataCodes { - METADATA_STRING = 1, // MDSTRING: [values] + METADATA_STRING_OLD = 1, // MDSTRING: [values] METADATA_VALUE = 2, // VALUE: [type num, value num] METADATA_NODE = 3, // NODE: [n x md num] METADATA_NAME = 4, // STRING: [values] @@ -243,6 +243,7 @@ enum { BITCODE_CURRENT_EPOCH = 0 }; METADATA_MODULE = 32, // [distinct, scope, name, ...] METADATA_MACRO = 33, // [distinct, macinfo, line, name, value] METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] + METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars]) }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index bb479dbcedc..cb2784ff204 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -396,6 +396,9 @@ private: std::error_code globalCleanup(); std::error_code resolveGlobalAndAliasInits(); std::error_code parseMetadata(bool ModuleLevel = false); + std::error_code parseMetadataStrings(ArrayRef Record, + StringRef Blob, + unsigned &NextMetadataNo); std::error_code parseMetadataKinds(); std::error_code parseMetadataKindRecord(SmallVectorImpl &Record); std::error_code parseMetadataAttachment(Function &F); @@ -1883,6 +1886,47 @@ BitcodeReader::parseMetadataKindRecord(SmallVectorImpl &Record) { static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; } +std::error_code BitcodeReader::parseMetadataStrings(ArrayRef Record, + StringRef Blob, + unsigned &NextMetadataNo) { + // All the MDStrings in the block are emitted together in a single + // record. The strings are concatenated and stored in a blob along with + // their sizes. + if (Record.size() != 2) + return error("Invalid record: metadata strings layout"); + + unsigned NumStrings = Record[0]; + unsigned StringsOffset = Record[1]; + if (!NumStrings) + return error("Invalid record: metadata strings with no strings"); + if (StringsOffset >= Blob.size()) + return error("Invalid record: metadata strings corrupt offset"); + + StringRef Lengths = Blob.slice(0, StringsOffset); + SimpleBitstreamCursor R(*StreamFile); + R.jumpToPointer(Lengths.begin()); + + // Ensure that Blob doesn't get invalidated, even if this is reading from + // a StreamingMemoryObject with corrupt data. + R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset); + + StringRef Strings = Blob.drop_front(StringsOffset); + do { + if (R.AtEndOfStream()) + return error("Invalid record: metadata strings bad length"); + + unsigned Size = R.ReadVBR(6); + if (Strings.size() < Size) + return error("Invalid record: metadata strings truncated chars"); + + MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)), + NextMetadataNo++); + Strings = Strings.drop_front(Size); + } while (--NumStrings); + + return std::error_code(); +} + /// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing /// module level metadata. std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { @@ -1929,7 +1973,8 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { // Read a record. Record.clear(); - unsigned Code = Stream.readRecord(Entry.ID, Record); + StringRef Blob; + unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob); bool IsDistinct = false; switch (Code) { default: // Default behavior: ignore. @@ -2363,7 +2408,7 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { NextMetadataNo++); break; } - case bitc::METADATA_STRING: { + case bitc::METADATA_STRING_OLD: { std::string String(Record.begin(), Record.end()); // Test for upgrading !llvm.loop. @@ -2373,6 +2418,11 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { MetadataList.assignValue(MD, NextMetadataNo++); break; } + case bitc::METADATA_STRINGS: + if (std::error_code EC = + parseMetadataStrings(Record, Blob, NextMetadataNo)) + return EC; + break; case bitc::METADATA_KIND: { // Support older bitcode files that had METADATA_KIND records in a // block with METADATA_BLOCK_ID. diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 8284d55cefc..80da91aa47e 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1347,31 +1347,65 @@ static void writeNamedMetadata(const Module &M, const ValueEnumerator &VE, } } +static unsigned createMetadataStringsAbbrev(BitstreamWriter &Stream) { + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRINGS)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of strings + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // offset to chars + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + return Stream.EmitAbbrev(Abbv); +} + +/// Write out a record for MDString. +/// +/// All the metadata strings in a metadata block are emitted in a single +/// record. The sizes and strings themselves are shoved into a blob. +static void writeMetadataStrings(ArrayRef Strings, + BitstreamWriter &Stream, + SmallVectorImpl &Record) { + if (Strings.empty()) + return; + + // Start the record with the number of strings. + Record.push_back(bitc::METADATA_STRINGS); + Record.push_back(Strings.size()); + + // Emit the sizes of the strings in the blob. + SmallString<256> Blob; + { + BitstreamWriter W(Blob); + for (const Metadata *MD : Strings) + W.EmitVBR(cast(MD)->getLength(), 6); + W.FlushToWord(); + } + + // Add the offset to the strings to the record. + Record.push_back(Blob.size()); + + // Add the strings to the blob. + for (const Metadata *MD : Strings) + Blob.append(cast(MD)->getString()); + + // Emit the final record. + Stream.EmitRecordWithBlob(createMetadataStringsAbbrev(Stream), Record, Blob); + Record.clear(); +} + static void WriteModuleMetadata(const Module &M, const ValueEnumerator &VE, BitstreamWriter &Stream) { - const auto &MDs = VE.getMDs(); - if (MDs.empty() && M.named_metadata_empty()) + if (VE.getMDs().empty() && M.named_metadata_empty()) return; Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); - unsigned MDSAbbrev = 0; - if (VE.hasMDString()) { - // Abbrev for METADATA_STRING. - BitCodeAbbrev *Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); - MDSAbbrev = Stream.EmitAbbrev(Abbv); - } - // Initialize MDNode abbreviations. #define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0; #include "llvm/IR/Metadata.def" SmallVector Record; - for (const Metadata *MD : MDs) { + writeMetadataStrings(VE.getMDStrings(), Stream, Record); + for (const Metadata *MD : VE.getNonMDStrings()) { if (const MDNode *N = dyn_cast(MD)) { assert(N->isResolved() && "Expected forward references to be resolved"); @@ -1385,17 +1419,7 @@ static void WriteModuleMetadata(const Module &M, #include "llvm/IR/Metadata.def" } } - if (const auto *MDC = dyn_cast(MD)) { - WriteValueAsMetadata(MDC, VE, Stream, Record); - continue; - } - const MDString *MDS = cast(MD); - // Code: [strchar x N] - Record.append(MDS->bytes_begin(), MDS->bytes_end()); - - // Emit the finished record. - Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev); - Record.clear(); + WriteValueAsMetadata(cast(MD), VE, Stream, Record); } writeNamedMetadata(M, VE, Stream, Record); diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 08b5e45703a..69cafb7eeee 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -280,8 +280,7 @@ static bool isIntOrIntVectorValue(const std::pair &V) { ValueEnumerator::ValueEnumerator(const Module &M, bool ShouldPreserveUseListOrder) - : HasMDString(false), - ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { + : ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) { if (ShouldPreserveUseListOrder) UseListOrders = predictUseListOrder(M); @@ -375,6 +374,9 @@ ValueEnumerator::ValueEnumerator(const Module &M, // Optimize constant ordering. OptimizeConstants(FirstConstant, Values.size()); + + // Organize metadata ordering. + organizeMetadata(); } unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { @@ -530,8 +532,8 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) { EnumerateMDNodeOperands(N); else if (auto *C = dyn_cast(MD)) EnumerateValue(C->getValue()); - - HasMDString |= isa(MD); + else + ++NumMDStrings; // Replace the dummy ID inserted above with the correct one. MetadataMap may // have changed by inserting operands, so we need a fresh lookup here. @@ -557,6 +559,19 @@ void ValueEnumerator::EnumerateFunctionLocalMetadata( FunctionLocalMDs.push_back(Local); } +void ValueEnumerator::organizeMetadata() { + if (!NumMDStrings) + return; + + // Put the strings first. + std::stable_partition(MDs.begin(), MDs.end(), + [](const Metadata *MD) { return isa(MD); }); + + // Renumber. + for (unsigned I = 0, E = MDs.size(); I != E; ++I) + MetadataMap[MDs[I]] = I + 1; +} + void ValueEnumerator::EnumerateValue(const Value *V) { assert(!V->getType()->isVoidTy() && "Can't insert void values!"); assert(!isa(V) && "EnumerateValue doesn't handle Metadata!"); diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 7665210d014..fd09a695a99 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -66,7 +66,7 @@ private: SmallVector FunctionLocalMDs; typedef DenseMap MetadataMapType; MetadataMapType MetadataMap; - bool HasMDString; + unsigned NumMDStrings = 0; bool ShouldPreserveUseListOrder; typedef DenseMap AttributeGroupMapType; @@ -121,8 +121,6 @@ public: } unsigned numMDs() const { return MDs.size(); } - bool hasMDString() const { return HasMDString; } - bool shouldPreserveUseListOrder() const { return ShouldPreserveUseListOrder; } unsigned getTypeID(Type *T) const { @@ -157,9 +155,16 @@ public: const ValueList &getValues() const { return Values; } const std::vector &getMDs() const { return MDs; } + ArrayRef getMDStrings() const { + return makeArrayRef(MDs).slice(0, NumMDStrings); + } + ArrayRef getNonMDStrings() const { + return makeArrayRef(MDs).slice(NumMDStrings); + } const SmallVectorImpl &getFunctionLocalMDs() const { return FunctionLocalMDs; } + const TypeList &getTypes() const { return Types; } const std::vector &getBasicBlocks() const { return BasicBlocks; @@ -189,6 +194,10 @@ public: private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); + // Reorder the reachable metadata. This is not just an optimization, but is + // mandatory for emitting MDString correctly. + void organizeMetadata(); + void EnumerateMDNodeOperands(const MDNode *N); void EnumerateMetadata(const Metadata *MD); void EnumerateFunctionLocalMetadata(const LocalAsMetadata *Local); diff --git a/test/Bitcode/metadata-strings.ll b/test/Bitcode/metadata-strings.ll new file mode 100644 index 00000000000..9c48db5b983 --- /dev/null +++ b/test/Bitcode/metadata-strings.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s + +!named = !{!0} + +; CHECK: num-strings = 3 { +; CHECK-NEXT: 'a' +; CHECK-NEXT: 'b' +; CHECK-NEXT: 'c' +; CHECK-NEXT: } +!0 = !{!"a", !"b", !"c"} diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index 32179c168de..6645045db8f 100644 --- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -312,7 +312,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID, case bitc::METADATA_BLOCK_ID: switch(CodeID) { default:return nullptr; - STRINGIFY_CODE(METADATA, STRING) + STRINGIFY_CODE(METADATA, STRING_OLD) + STRINGIFY_CODE(METADATA, STRINGS) STRINGIFY_CODE(METADATA, NAME) STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK STRINGIFY_CODE(METADATA, NODE) @@ -404,6 +405,57 @@ static bool Error(const Twine &Err) { return true; } +static bool decodeMetadataStringsBlob(BitstreamReader &Reader, StringRef Indent, + ArrayRef Record, + StringRef Blob) { + if (Blob.empty()) + return true; + + if (Record.size() != 2) + return true; + + unsigned NumStrings = Record[0]; + unsigned StringsOffset = Record[1]; + outs() << " num-strings = " << NumStrings << " {\n"; + + StringRef Lengths = Blob.slice(0, StringsOffset); + SimpleBitstreamCursor R(Reader); + R.jumpToPointer(Lengths.begin()); + + // Ensure that Blob doesn't get invalidated, even if this is reading from a + // StreamingMemoryObject with corrupt data. + R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset); + + StringRef Strings = Blob.drop_front(StringsOffset); + do { + if (R.AtEndOfStream()) + return Error("bad length"); + + unsigned Size = R.ReadVBR(6); + if (Strings.size() < Size) + return Error("truncated chars"); + + outs() << Indent << " '"; + outs().write_escaped(Strings.slice(0, Size), /*hex=*/true); + outs() << "'\n"; + Strings = Strings.drop_front(Size); + } while (--NumStrings); + + outs() << Indent << " }"; + return false; +} + +static bool decodeBlob(unsigned Code, unsigned BlockID, BitstreamReader &Reader, + StringRef Indent, ArrayRef Record, + StringRef Blob) { + if (BlockID != bitc::METADATA_BLOCK_ID) + return true; + if (Code != bitc::METADATA_STRINGS) + return true; + + return decodeMetadataStringsBlob(Reader, Indent, Record, Blob); +} + /// ParseBlock - Read a block, updating statistics, etc. static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID, unsigned IndentLevel, CurStreamTypeType CurStreamType) { @@ -557,7 +609,8 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID, } } - if (Blob.data()) { + if (Blob.data() && decodeBlob(Code, BlockID, *Stream.getBitStreamReader(), + Indent, Record, Blob)) { outs() << " blob data = "; if (ShowBinaryBlobs) { outs() << "'";