Minidump: Add support for reading/writing strings

Summary:
Strings in minidump files are stored as a 32-bit length field, giving
the length of the string in *bytes*, which is followed by the
appropriate number of UTF16 code units. The string is also supposed to
be null-terminated, and the null-terminator is not a part of the length
field. This patch:
- adds support for reading these strings out of the minidump file (this
  implementation does not depend on proper null-termination)
- adds support for writing them to a minidump file
- using the previous two pieces implements proper (de)serialization of
  the CSDVersion field of the SystemInfo stream. Previously, this was
  only read/written as hex, and no attempt was made to access the
  referenced string -- now this string is read and written correctly.

The changes are tested via yaml2obj|obj2yaml round-trip as well as a
unit test which checks the corner cases of the string deserialization
logic.

Reviewers: jhenderson, zturner, clayborg

Subscribers: llvm-commits, aprantl, markmentovai, amccarth, lldb-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D59775

llvm-svn: 357749
This commit is contained in:
Pavel Labath 2019-04-05 08:06:26 +00:00
parent 98edcd9b9c
commit 51d9fa0a22
7 changed files with 114 additions and 11 deletions

View File

@ -43,6 +43,10 @@ public:
/// file does not contain a stream of this type.
Optional<ArrayRef<uint8_t>> getRawStream(minidump::StreamType Type) const;
/// Returns the minidump string at the given offset. An error is returned if
/// we fail to parse the string, or the string is invalid UTF16.
Expected<std::string> getString(size_t Offset) const;
/// Returns the contents of the SystemInfo stream, cast to the appropriate
/// type. An error is returned if the file does not contain this stream, or
/// the stream is smaller than the size of the SystemInfo structure. The

View File

@ -67,10 +67,12 @@ struct RawContentStream : public Stream {
/// SystemInfo minidump stream.
struct SystemInfoStream : public Stream {
minidump::SystemInfo Info;
std::string CSDVersion;
explicit SystemInfoStream(const minidump::SystemInfo &Info)
explicit SystemInfoStream(const minidump::SystemInfo &Info,
std::string CSDVersion)
: Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo),
Info(Info) {}
Info(Info), CSDVersion(std::move(CSDVersion)) {}
SystemInfoStream()
: Stream(StreamKind::SystemInfo, minidump::StreamType::SystemInfo) {

View File

@ -8,6 +8,7 @@
#include "llvm/Object/Minidump.h"
#include "llvm/Object/Error.h"
#include "llvm/Support/ConvertUTF.h"
using namespace llvm;
using namespace llvm::object;
@ -21,6 +22,33 @@ MinidumpFile::getRawStream(minidump::StreamType Type) const {
return None;
}
Expected<std::string> MinidumpFile::getString(size_t Offset) const {
// Minidump strings consist of a 32-bit length field, which gives the size of
// the string in *bytes*. This is followed by the actual string encoded in
// UTF16.
auto ExpectedSize =
getDataSliceAs<support::ulittle32_t>(getData(), Offset, 1);
if (!ExpectedSize)
return ExpectedSize.takeError();
size_t Size = (*ExpectedSize)[0];
if (Size % 2 != 0)
return createError("String size not even");
Size /= 2;
if (Size == 0)
return "";
Offset += sizeof(support::ulittle32_t);
auto ExpectedData = getDataSliceAs<UTF16>(getData(), Offset, Size);
if (!ExpectedData)
return ExpectedData.takeError();
std::string Result;
if (!convertUTF16ToUTF8String(*ExpectedData, Result))
return createError("String decoding failed");
return Result;
}
Expected<ArrayRef<uint8_t>>
MinidumpFile::getDataSlice(ArrayRef<uint8_t> Data, size_t Offset, size_t Size) {
// Check for overflow.

View File

@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ObjectYAML/MinidumpYAML.h"
#include "llvm/Support/ConvertUTF.h"
using namespace llvm;
using namespace llvm::MinidumpYAML;
@ -39,6 +40,8 @@ public:
return allocateArray(makeArrayRef(Data));
}
size_t allocateString(StringRef Str);
void writeTo(raw_ostream &OS) const;
private:
@ -48,6 +51,26 @@ private:
};
} // namespace
size_t BlobAllocator::allocateString(StringRef Str) {
SmallVector<UTF16, 32> WStr;
bool OK = convertUTF8ToUTF16String(Str, WStr);
assert(OK && "Invalid UTF8 in Str?");
(void)OK;
SmallVector<support::ulittle16_t, 32> EndianStr(WStr.size() + 1,
support::ulittle16_t());
copy(WStr, EndianStr.begin());
return allocateCallback(
sizeof(uint32_t) + EndianStr.size() * sizeof(support::ulittle16_t),
[EndianStr](raw_ostream &OS) {
// Length does not include the null-terminator.
support::ulittle32_t Length(2 * (EndianStr.size() - 1));
OS.write(reinterpret_cast<const char *>(&Length), sizeof(Length));
OS.write(reinterpret_cast<const char *>(EndianStr.begin()),
sizeof(support::ulittle16_t) * EndianStr.size());
});
}
void BlobAllocator::writeTo(raw_ostream &OS) const {
size_t BeginOffset = OS.tell();
for (const auto &Callback : Callbacks)
@ -269,7 +292,7 @@ static void streamMapping(yaml::IO &IO, SystemInfoStream &Stream) {
mapOptional(IO, "Minor Version", Info.MinorVersion, 0);
mapOptional(IO, "Build Number", Info.BuildNumber, 0);
IO.mapRequired("Platform ID", Info.PlatformId);
mapOptionalHex(IO, "CSD Version RVA", Info.CSDVersionRVA, 0);
IO.mapOptional("CSD Version", Stream.CSDVersion, "");
mapOptionalHex(IO, "Suite Mask", Info.SuiteMask, 0);
mapOptionalHex(IO, "Reserved", Info.Reserved, 0);
switch (static_cast<ProcessorArchitecture>(Info.ProcessorArch)) {
@ -337,6 +360,7 @@ static Directory layout(BlobAllocator &File, Stream &S) {
Directory Result;
Result.Type = S.Type;
Result.Location.RVA = File.tell();
Optional<size_t> DataEnd;
switch (S.Kind) {
case Stream::StreamKind::RawContent: {
RawContentStream &Raw = cast<RawContentStream>(S);
@ -347,14 +371,22 @@ static Directory layout(BlobAllocator &File, Stream &S) {
});
break;
}
case Stream::StreamKind::SystemInfo:
File.allocateObject(cast<SystemInfoStream>(S).Info);
case Stream::StreamKind::SystemInfo: {
SystemInfoStream &SystemInfo = cast<SystemInfoStream>(S);
File.allocateObject(SystemInfo.Info);
// The CSD string is not a part of the stream.
DataEnd = File.tell();
SystemInfo.Info.CSDVersionRVA = File.allocateString(SystemInfo.CSDVersion);
break;
}
case Stream::StreamKind::TextContent:
File.allocateArray(arrayRefFromStringRef(cast<TextContentStream>(S).Text));
break;
}
Result.Location.DataSize = File.tell() - Result.Location.RVA;
// If DataEnd is not set, we assume everything we generated is a part of the
// stream.
Result.Location.DataSize =
DataEnd.getValueOr(File.tell()) - Result.Location.RVA;
return Result;
}
@ -395,7 +427,11 @@ Stream::create(const Directory &StreamDesc, const object::MinidumpFile &File) {
auto ExpectedInfo = File.getSystemInfo();
if (!ExpectedInfo)
return ExpectedInfo.takeError();
return make_unique<SystemInfoStream>(*ExpectedInfo);
auto ExpectedCSDVersion = File.getString(ExpectedInfo->CSDVersionRVA);
if (!ExpectedCSDVersion)
return ExpectedInfo.takeError();
return make_unique<SystemInfoStream>(*ExpectedInfo,
std::move(*ExpectedCSDVersion));
}
case StreamKind::TextContent:
return make_unique<TextContentStream>(

View File

@ -5,7 +5,7 @@ Streams:
- Type: SystemInfo
Processor Arch: ARM64
Platform ID: Linux
CSD Version RVA: 0x01020304
CSD Version: Linux 3.13.0-91-generic
CPU:
CPUID: 0x05060708
- Type: LinuxAuxv
@ -22,7 +22,7 @@ Streams:
# CHECK-NEXT: - Type: SystemInfo
# CHECK-NEXT: Processor Arch: ARM64
# CHECK-NEXT: Platform ID: Linux
# CHECK-NEXT: CSD Version RVA: 0x01020304
# CHECK-NEXT: CSD Version: Linux 3.13.0-91-generic
# CHECK-NEXT: CPU:
# CHECK-NEXT: CPUID: 0x05060708
# CHECK-NEXT: - Type: LinuxAuxv

View File

@ -249,3 +249,38 @@ TEST(MinidumpFile, getSystemInfo) {
EXPECT_EQ(0x08070605u, Info.CPU.X86.FeatureInfo);
EXPECT_EQ(0x02010009u, Info.CPU.X86.AMDExtendedFeatures);
}
TEST(MinidumpFile, getString) {
std::vector<uint8_t> ManyStrings{
// Header
'M', 'D', 'M', 'P', 0x93, 0xa7, 0, 0, // Signature, Version
2, 0, 0, 0, // NumberOfStreams,
0x20, 0, 0, 0, // StreamDirectoryRVA
0, 1, 2, 3, 4, 5, 6, 7, // Checksum, TimeDateStamp
8, 9, 0, 1, 2, 3, 4, 5, // Flags
// Stream Directory
0, 0, 0, 0, 0, 0, 0, 0, // Type, DataSize,
0x20, 0, 0, 0, // RVA
1, 0, 0, 0, 0, 0, // String1 - odd length
0, 0, 1, 0, 0, 0, // String2 - too long
2, 0, 0, 0, 0, 0xd8, // String3 - invalid utf16
0, 0, 0, 0, 0, 0, // String4 - ""
2, 0, 0, 0, 'a', 0, // String5 - "a"
0, // Mis-align next string
2, 0, 0, 0, 'a', 0, // String6 - "a"
};
auto ExpectedFile = create(ManyStrings);
ASSERT_THAT_EXPECTED(ExpectedFile, Succeeded());
const MinidumpFile &File = **ExpectedFile;
EXPECT_THAT_EXPECTED(File.getString(44), Failed<BinaryError>());
EXPECT_THAT_EXPECTED(File.getString(50), Failed<BinaryError>());
EXPECT_THAT_EXPECTED(File.getString(56), Failed<BinaryError>());
EXPECT_THAT_EXPECTED(File.getString(62), HasValue(""));
EXPECT_THAT_EXPECTED(File.getString(68), HasValue("a"));
EXPECT_THAT_EXPECTED(File.getString(75), HasValue("a"));
// Check the case when the size field does not fit into the remaining data.
EXPECT_THAT_EXPECTED(File.getString(ManyStrings.size() - 2),
Failed<BinaryError>());
}

View File

@ -33,7 +33,6 @@ Streams:
- Type: SystemInfo
Processor Arch: ARM64
Platform ID: Linux
CSD Version RVA: 0x01020304
CPU:
CPUID: 0x05060708
- Type: LinuxMaps
@ -54,7 +53,6 @@ Streams:
const SystemInfo &SysInfo = *ExpectedSysInfo;
EXPECT_EQ(ProcessorArchitecture::ARM64, SysInfo.ProcessorArch);
EXPECT_EQ(OSPlatform::Linux, SysInfo.PlatformId);
EXPECT_EQ(0x01020304u, SysInfo.CSDVersionRVA);
EXPECT_EQ(0x05060708u, SysInfo.CPU.Arm.CPUID);
EXPECT_EQ(StreamType::LinuxMaps, File.streams()[1].Type);