MinidumpYAML: Add support for ModuleList stream

Summary:
This patch adds support for yaml (de)serialization of the minidump
ModuleList stream. It's a fairly straight forward-application of the
existing patterns to the ModuleList structures defined in previous
patches.

One thing, which may be interesting to call out explicitly is the
addition of "new" allocation functions to the helper BlobAllocator
class. The reason for this was, that there was an emerging pattern of a
need to allocate space for entities, which do not have a suitable
lifetime for use with the existing allocation functions. A typical
example of that was the "size" of various lists, which is only available
as a temporary returned by the .size() method of some container. For
these cases, one can use the new set of allocation functions, which
will take a temporary object, and store it in an allocator-managed
buffer until it is written to disk.

Reviewers: amccarth, jhenderson, clayborg, zturner

Subscribers: lldb-commits, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60405

llvm-svn: 358672
This commit is contained in:
Pavel Labath 2019-04-18 14:57:31 +00:00
parent dd0f5b8dc8
commit bbcc68ebde
5 changed files with 222 additions and 12 deletions

View File

@ -141,6 +141,10 @@ struct VSFixedFileInfo {
};
static_assert(sizeof(VSFixedFileInfo) == 52, "");
inline bool operator==(const VSFixedFileInfo &LHS, const VSFixedFileInfo &RHS) {
return memcmp(&LHS, &RHS, sizeof(VSFixedFileInfo)) == 0;
}
struct Module {
support::ulittle64_t BaseOfImage;
support::ulittle32_t SizeOfImage;

View File

@ -43,6 +43,13 @@ public:
/// file does not contain a stream of this type.
Optional<ArrayRef<uint8_t>> getRawStream(minidump::StreamType Type) const;
/// Returns the raw contents of an object given by the LocationDescriptor. An
/// error is returned if the descriptor points outside of the minidump file.
Expected<ArrayRef<uint8_t>>
getRawData(minidump::LocationDescriptor Desc) const {
return getDataSlice(getData(), Desc.RVA, Desc.DataSize);
}
/// Returns the minidump string at the given offset. An error is returned if
/// we fail to parse the string, or the string is invalid UTF16.
Expected<std::string> getString(size_t Offset) const;

View File

@ -26,6 +26,7 @@ namespace MinidumpYAML {
/// from Types to Kinds is fixed and given by the static getKind function.
struct Stream {
enum class StreamKind {
ModuleList,
RawContent,
SystemInfo,
TextContent,
@ -49,6 +50,30 @@ struct Stream {
const object::MinidumpFile &File);
};
/// A stream representing the list of modules loaded in the process. On disk, it
/// is represented as a sequence of minidump::Module structures. These contain
/// pointers to other data structures, like the module's name and CodeView
/// record. In memory, we represent these as the ParsedModule struct, which
/// groups minidump::Module with all of its dependant structures in a single
/// entity.
struct ModuleListStream : public Stream {
struct ParsedModule {
minidump::Module Module;
std::string Name;
yaml::BinaryRef CvRecord;
yaml::BinaryRef MiscRecord;
};
std::vector<ParsedModule> Modules;
ModuleListStream(std::vector<ParsedModule> Modules = {})
: Stream(StreamKind::ModuleList, minidump::StreamType::ModuleList),
Modules(std::move(Modules)) {}
static bool classof(const Stream *S) {
return S->Kind == StreamKind::ModuleList;
}
};
/// A minidump stream represented as a sequence of hex bytes. This is used as a
/// fallback when no other stream kind is suitable.
struct RawContentStream : public Stream {
@ -162,8 +187,12 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::minidump::StreamType)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::ArmInfo)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::OtherInfo)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::CPUInfo::X86Info)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::minidump::VSFixedFileInfo)
LLVM_YAML_DECLARE_MAPPING_TRAITS(
llvm::MinidumpYAML::ModuleListStream::ParsedModule)
LLVM_YAML_IS_SEQUENCE_VECTOR(std::unique_ptr<llvm::MinidumpYAML::Stream>)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MinidumpYAML::ModuleListStream::ParsedModule)
LLVM_YAML_DECLARE_MAPPING_TRAITS(llvm::MinidumpYAML::Object)

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/MinidumpYAML.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ConvertUTF.h"
using namespace llvm;
@ -14,6 +15,16 @@ using namespace llvm::MinidumpYAML;
using namespace llvm::minidump;
namespace {
/// A helper class to manage the placement of various structures into the final
/// minidump binary. Space for objects can be allocated via various allocate***
/// methods, while the final minidump file is written by calling the writeTo
/// method. The plain versions of allocation functions take a reference to the
/// data which is to be written (and hence the data must be available until
/// writeTo is called), while the "New" versions allocate the data in an
/// allocator-managed buffer, which is available until the allocator object is
/// destroyed. For both kinds of functions, it is possible to modify the
/// data for which the space has been "allocated" until the final writeTo call.
/// This is useful for "linking" the allocated structures via their offsets.
class BlobAllocator {
public:
size_t tell() const { return NextOffset; }
@ -31,15 +42,31 @@ public:
Data.size(), [Data](raw_ostream &OS) { OS << toStringRef(Data); });
}
size_t allocateBytes(yaml::BinaryRef Data) {
return allocateCallback(Data.binary_size(), [Data](raw_ostream &OS) {
Data.writeAsBinary(OS);
});
}
template <typename T> size_t allocateArray(ArrayRef<T> Data) {
return allocateBytes({reinterpret_cast<const uint8_t *>(Data.data()),
sizeof(T) * Data.size()});
}
template <typename T, typename RangeType>
std::pair<size_t, MutableArrayRef<T>>
allocateNewArray(const iterator_range<RangeType> &Range);
template <typename T> size_t allocateObject(const T &Data) {
return allocateArray(makeArrayRef(Data));
}
template <typename T, typename... Types>
std::pair<size_t, T *> allocateNewObject(Types &&... Args) {
T *Object = new (Temporaries.Allocate<T>()) T(std::forward<Types>(Args)...);
return {allocateObject(*Object), Object};
}
size_t allocateString(StringRef Str);
void writeTo(raw_ostream &OS) const;
@ -47,28 +74,33 @@ public:
private:
size_t NextOffset = 0;
BumpPtrAllocator Temporaries;
std::vector<std::function<void(raw_ostream &)>> Callbacks;
};
} // namespace
template <typename T, typename RangeType>
std::pair<size_t, MutableArrayRef<T>>
BlobAllocator::allocateNewArray(const iterator_range<RangeType> &Range) {
size_t Num = std::distance(Range.begin(), Range.end());
MutableArrayRef<T> Array(Temporaries.Allocate<T>(Num), Num);
std::uninitialized_copy(Range.begin(), Range.end(), Array.begin());
return {allocateArray(Array), Array};
}
size_t BlobAllocator::allocateString(StringRef Str) {
SmallVector<UTF16, 32> WStr;
bool OK = convertUTF8ToUTF16String(Str, WStr);
assert(OK && "Invalid UTF8 in Str?");
(void)OK;
SmallVector<support::ulittle16_t, 32> EndianStr(WStr.size() + 1,
support::ulittle16_t());
copy(WStr, EndianStr.begin());
return allocateCallback(
sizeof(uint32_t) + EndianStr.size() * sizeof(support::ulittle16_t),
[EndianStr](raw_ostream &OS) {
// Length does not include the null-terminator.
support::ulittle32_t Length(2 * (EndianStr.size() - 1));
OS.write(reinterpret_cast<const char *>(&Length), sizeof(Length));
OS.write(reinterpret_cast<const char *>(EndianStr.begin()),
sizeof(support::ulittle16_t) * EndianStr.size());
});
// The utf16 string is null-terminated, but the terminator is not counted in
// the string size.
WStr.push_back(0);
size_t Result =
allocateNewObject<support::ulittle32_t>(2 * (WStr.size() - 1)).first;
allocateNewArray<support::ulittle16_t>(make_range(WStr.begin(), WStr.end()));
return Result;
}
void BlobAllocator::writeTo(raw_ostream &OS) const {
@ -136,6 +168,8 @@ Stream::~Stream() = default;
Stream::StreamKind Stream::getKind(StreamType Type) {
switch (Type) {
case StreamType::ModuleList:
return StreamKind::ModuleList;
case StreamType::SystemInfo:
return StreamKind::SystemInfo;
case StreamType::LinuxCPUInfo:
@ -154,6 +188,8 @@ Stream::StreamKind Stream::getKind(StreamType Type) {
std::unique_ptr<Stream> Stream::create(StreamType Type) {
StreamKind Kind = getKind(Type);
switch (Kind) {
case StreamKind::ModuleList:
return llvm::make_unique<ModuleListStream>();
case StreamKind::RawContent:
return llvm::make_unique<RawContentStream>(Type);
case StreamKind::SystemInfo:
@ -270,6 +306,38 @@ void yaml::MappingTraits<CPUInfo::X86Info>::mapping(IO &IO,
mapOptionalHex(IO, "AMD Extended Features", Info.AMDExtendedFeatures, 0);
}
void yaml::MappingTraits<VSFixedFileInfo>::mapping(IO &IO,
VSFixedFileInfo &Info) {
mapOptionalHex(IO, "Signature", Info.Signature, 0);
mapOptionalHex(IO, "Struct Version", Info.StructVersion, 0);
mapOptionalHex(IO, "File Version High", Info.FileVersionHigh, 0);
mapOptionalHex(IO, "File Version Low", Info.FileVersionLow, 0);
mapOptionalHex(IO, "Product Version High", Info.ProductVersionHigh, 0);
mapOptionalHex(IO, "Product Version Low", Info.ProductVersionLow, 0);
mapOptionalHex(IO, "File Flags Mask", Info.FileFlagsMask, 0);
mapOptionalHex(IO, "File Flags", Info.FileFlags, 0);
mapOptionalHex(IO, "File OS", Info.FileOS, 0);
mapOptionalHex(IO, "File Type", Info.FileType, 0);
mapOptionalHex(IO, "File Subtype", Info.FileSubtype, 0);
mapOptionalHex(IO, "File Date High", Info.FileDateHigh, 0);
mapOptionalHex(IO, "File Date Low", Info.FileDateLow, 0);
}
void yaml::MappingTraits<ModuleListStream::ParsedModule>::mapping(
IO &IO, ModuleListStream::ParsedModule &M) {
mapRequiredHex(IO, "Base of Image", M.Module.BaseOfImage);
mapRequiredHex(IO, "Size of Image", M.Module.SizeOfImage);
mapOptionalHex(IO, "Checksum", M.Module.Checksum, 0);
IO.mapOptional("Time Date Stamp", M.Module.TimeDateStamp,
support::ulittle32_t(0));
IO.mapRequired("Module Name", M.Name);
IO.mapOptional("Version Info", M.Module.VersionInfo, VSFixedFileInfo());
IO.mapRequired("CodeView Record", M.CvRecord);
IO.mapOptional("Misc Record", M.MiscRecord, yaml::BinaryRef());
mapOptionalHex(IO, "Reserved0", M.Module.Reserved0, 0);
mapOptionalHex(IO, "Reserved1", M.Module.Reserved1, 0);
}
static void streamMapping(yaml::IO &IO, RawContentStream &Stream) {
IO.mapOptional("Content", Stream.Content);
IO.mapOptional("Size", Stream.Size, Stream.Content.binary_size());
@ -281,6 +349,10 @@ static StringRef streamValidate(RawContentStream &Stream) {
return "";
}
static void streamMapping(yaml::IO &IO, ModuleListStream &Stream) {
IO.mapRequired("Modules", Stream.Modules);
}
static void streamMapping(yaml::IO &IO, SystemInfoStream &Stream) {
SystemInfo &Info = Stream.Info;
IO.mapRequired("Processor Arch", Info.ProcessorArch);
@ -324,6 +396,9 @@ void yaml::MappingTraits<std::unique_ptr<Stream>>::mapping(
if (!IO.outputting())
S = MinidumpYAML::Stream::create(Type);
switch (S->Kind) {
case MinidumpYAML::Stream::StreamKind::ModuleList:
streamMapping(IO, llvm::cast<ModuleListStream>(*S));
break;
case MinidumpYAML::Stream::StreamKind::RawContent:
streamMapping(IO, llvm::cast<RawContentStream>(*S));
break;
@ -341,6 +416,7 @@ StringRef yaml::MappingTraits<std::unique_ptr<Stream>>::validate(
switch (S->Kind) {
case MinidumpYAML::Stream::StreamKind::RawContent:
return streamValidate(cast<RawContentStream>(*S));
case MinidumpYAML::Stream::StreamKind::ModuleList:
case MinidumpYAML::Stream::StreamKind::SystemInfo:
case MinidumpYAML::Stream::StreamKind::TextContent:
return "";
@ -362,6 +438,26 @@ static Directory layout(BlobAllocator &File, Stream &S) {
Result.Location.RVA = File.tell();
Optional<size_t> DataEnd;
switch (S.Kind) {
case Stream::StreamKind::ModuleList: {
ModuleListStream &List = cast<ModuleListStream>(S);
File.allocateNewObject<support::ulittle32_t>(List.Modules.size());
for (ModuleListStream::ParsedModule &M : List.Modules)
File.allocateObject(M.Module);
// Module names and CodeView/Misc records are not a part of the stream.
DataEnd = File.tell();
for (ModuleListStream::ParsedModule &M : List.Modules) {
M.Module.ModuleNameRVA = File.allocateString(M.Name);
M.Module.CvRecord.RVA = File.allocateBytes(M.CvRecord);
M.Module.CvRecord.DataSize = M.CvRecord.binary_size();
M.Module.MiscRecord.RVA = File.allocateBytes(M.MiscRecord);
M.Module.MiscRecord.DataSize = M.MiscRecord.binary_size();
}
break;
}
case Stream::StreamKind::RawContent: {
RawContentStream &Raw = cast<RawContentStream>(S);
File.allocateCallback(Raw.Size, [&Raw](raw_ostream &OS) {
@ -420,6 +516,26 @@ Expected<std::unique_ptr<Stream>>
Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
StreamKind Kind = getKind(StreamDesc.Type);
switch (Kind) {
case StreamKind::ModuleList: {
auto ExpectedList = File.getModuleList();
if (!ExpectedList)
return ExpectedList.takeError();
std::vector<ModuleListStream::ParsedModule> Modules;
for (const Module &M : *ExpectedList) {
auto ExpectedName = File.getString(M.ModuleNameRVA);
if (!ExpectedName)
return ExpectedName.takeError();
auto ExpectedCv = File.getRawData(M.CvRecord);
if (!ExpectedCv)
return ExpectedCv.takeError();
auto ExpectedMisc = File.getRawData(M.MiscRecord);
if (!ExpectedMisc)
return ExpectedMisc.takeError();
Modules.push_back(
{M, std::move(*ExpectedName), *ExpectedCv, *ExpectedMisc});
}
return make_unique<ModuleListStream>(std::move(Modules));
}
case StreamKind::RawContent:
return llvm::make_unique<RawContentStream>(StreamDesc.Type,
File.getRawStream(StreamDesc));

View File

@ -15,6 +15,33 @@ Streams:
400d9000-400db000 r-xp 00000000 b3:04 227 /system/bin/app_process
400db000-400dc000 r--p 00001000 b3:04 227 /system/bin/app_process
- Type: ModuleList
Modules:
- Base of Image: 0x0001020304050607
Size of Image: 0x08090A0B
Checksum: 0x0C0D0E0F
Time Date Stamp: 47
Module Name: a.out
Version Info:
Signature: 0x10111213
Struct Version: 0x14151617
File Version High: 0x18191A1B
File Version Low: 0x1C1D1E1F
Product Version High: 0x20212223
Product Version Low: 0x24252627
File Flags Mask: 0x28292A2B
File Flags: 0x2C2D2E2F
File OS: 0x30313233
File Type: 0x34353637
File Subtype: 0x38393A3B
File Date High: 0x3C3D3E3F
File Date Low: 0x40414243
CodeView Record: '44454647'
Misc Record: 48494A4B
- Base of Image: 0x4C4D4E4F50515253
Size of Image: 0x54555657
Module Name: libb.so
CodeView Record: 58595A5B
...
# CHECK: --- !minidump
@ -32,4 +59,31 @@ Streams:
# CHECK-NEXT: 400d9000-400db000 r-xp 00000000 b3:04 227 /system/bin/app_process
# CHECK-NEXT: 400db000-400dc000 r--p 00001000 b3:04 227 /system/bin/app_process
# CHECK-EMPTY:
# CHECK-NEXT: - Type: ModuleList
# CHECK-NEXT: Modules:
# CHECK-NEXT: - Base of Image: 0x0001020304050607
# CHECK-NEXT: Size of Image: 0x08090A0B
# CHECK-NEXT: Checksum: 0x0C0D0E0F
# CHECK-NEXT: Time Date Stamp: 47
# CHECK-NEXT: Module Name: a.out
# CHECK-NEXT: Version Info:
# CHECK-NEXT: Signature: 0x10111213
# CHECK-NEXT: Struct Version: 0x14151617
# CHECK-NEXT: File Version High: 0x18191A1B
# CHECK-NEXT: File Version Low: 0x1C1D1E1F
# CHECK-NEXT: Product Version High: 0x20212223
# CHECK-NEXT: Product Version Low: 0x24252627
# CHECK-NEXT: File Flags Mask: 0x28292A2B
# CHECK-NEXT: File Flags: 0x2C2D2E2F
# CHECK-NEXT: File OS: 0x30313233
# CHECK-NEXT: File Type: 0x34353637
# CHECK-NEXT: File Subtype: 0x38393A3B
# CHECK-NEXT: File Date High: 0x3C3D3E3F
# CHECK-NEXT: File Date Low: 0x40414243
# CHECK-NEXT: CodeView Record: '44454647'
# CHECK-NEXT: Misc Record: 48494A4B
# CHECK-NEXT: - Base of Image: 0x4C4D4E4F50515253
# CHECK-NEXT: Size of Image: 0x54555657
# CHECK-NEXT: Module Name: libb.so
# CHECK-NEXT: CodeView Record: 58595A5B
# CHECK-NEXT: ...