llvm-pdbdump: Fix several smaller issues with injected source compression handling

- getCompression() used to return a PDB_SourceCompression even though
  the docs for IDiaInjectedSource are explicit about the return value
  being compiler-dependent. Return an uint32_t instead, and make the
  printing code handle unknown values better by printing "Unknown" and
  the int value instead of not printing any compression.

- Print compressed contents as hex dump, not as string.

- Add compression type "DotNet", which is used (at least) by csc.exe,
  the C# compiler. Also add a lengthy comment describing the stream
  contents (derived from looking at the raw hex contents long enough
  to see the GUIDs, which led me to the roslyn and mono implementations
  for handling this).

- The native injected source dumper was dumping the contents of the
  whole data stream -- but csc.exe writes a stream that's padded with
  zero bytes to the next 512 boundary, and the dia api doesn't display
  those padding bytes. So make NativeInjectedSource::getCode() do the
  same thing.

Differential Revision: https://reviews.llvm.org/D64879

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366386 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nico Weber 2019-07-17 22:59:52 +00:00
parent 08b0610980
commit f45b56db8e
14 changed files with 181 additions and 27 deletions

View File

@ -25,7 +25,7 @@ public:
std::string getFileName() const override;
std::string getObjectFileName() const override;
std::string getVirtualFileName() const override;
PDB_SourceCompression getCompression() const override;
uint32_t getCompression() const override;
std::string getCode() const override;
private:

View File

@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_PDB_IPDBINJECTEDSOURCE_H
#define LLVM_DEBUGINFO_PDB_IPDBINJECTEDSOURCE_H
#include "PDBTypes.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>
#include <string>
@ -32,7 +31,10 @@ public:
virtual std::string getFileName() const = 0;
virtual std::string getObjectFileName() const = 0;
virtual std::string getVirtualFileName() const = 0;
virtual PDB_SourceCompression getCompression() const = 0;
// The returned value depends on the PDB producer,
// but 0 is guaranteed to mean "no compression".
// The enum PDB_SourceCompression lists known return values.
virtual uint32_t getCompression() const = 0;
virtual std::string getCode() const = 0;
};
} // namespace pdb

View File

@ -37,13 +37,12 @@ raw_ostream &operator<<(raw_ostream &OS, const PDB_SymType &Tag);
raw_ostream &operator<<(raw_ostream &OS, const PDB_MemberAccess &Access);
raw_ostream &operator<<(raw_ostream &OS, const PDB_UdtType &Type);
raw_ostream &operator<<(raw_ostream &OS, const PDB_Machine &Machine);
raw_ostream &operator<<(raw_ostream &OS,
const PDB_SourceCompression &Compression);
raw_ostream &operator<<(raw_ostream &OS, const Variant &Value);
raw_ostream &operator<<(raw_ostream &OS, const VersionInfo &Version);
raw_ostream &operator<<(raw_ostream &OS, const TagStats &Stats);
raw_ostream& dumpPDBSourceCompression(raw_ostream& OS, uint32_t Compression);
template <typename T>
void dumpSymbolField(raw_ostream &OS, StringRef Name, T Value, int Indent) {

View File

@ -146,11 +146,69 @@ enum class PDB_Machine {
WceMipsV2 = 0x169
};
enum class PDB_SourceCompression {
None,
RunLengthEncoded,
Huffman,
LZ,
// A struct with an inner unnamed enum with explicit underlying type resuls
// in an enum class that can implicitly convert to the underlying type, which
// is convenient for this enum.
struct PDB_SourceCompression {
enum : uint32_t {
// No compression. Produced e.g. by `link.exe /natvis:foo.natvis`.
None,
// Not known what produces this.
RunLengthEncoded,
// Not known what produces this.
Huffman,
// Not known what produces this.
LZ,
// Produced e.g. by `csc /debug`. The encoded data is its own mini-stream
// with the following layout (in little endian):
// GUID LanguageTypeGuid;
// GUID LanguageVendorGuid;
// GUID DocumentTypeGuid;
// GUID HashFunctionGuid;
// uint32_t HashDataSize;
// uint32_t CompressedDataSize;
// Followed by HashDataSize bytes containing a hash checksum,
// followed by CompressedDataSize bytes containing source contents.
//
// CompressedDataSize can be 0, in this case only the hash data is present.
// (CompressedDataSize is != 0 e.g. if `/embed` is passed to csc.exe.)
// The compressed data format is:
// uint32_t UncompressedDataSize;
// If UncompressedDataSize is 0, the data is stored uncompressed and
// CompressedDataSize stores the uncompressed size.
// If UncompressedDataSize is != 0, then the data is in raw deflate
// encoding as described in rfc1951.
//
// A GUID is 16 bytes, stored in the usual
// uint32_t
// uint16_t
// uint16_t
// uint8_t[24]
// layout.
//
// Well-known GUIDs for LanguageTypeGuid are:
// 63a08714-fc37-11d2-904c-00c04fa302a1 C
// 3a12d0b7-c26c-11d0-b442-00a0244a1dd2 C++
// 3f5162f8-07c6-11d3-9053-00c04fa302a1 C#
// af046cd1-d0e1-11d2-977c-00a0c9b4d50c Cobol
// ab4f38c9-b6e6-43ba-be3b-58080b2ccce3 F#
// 3a12d0b4-c26c-11d0-b442-00a0244a1dd2 Java
// 3a12d0b6-c26c-11d0-b442-00a0244a1dd2 JScript
// af046cd2-d0e1-11d2-977c-00a0c9b4d50c Pascal
// 3a12d0b8-c26c-11d0-b442-00a0244a1dd2 Visual Basic
//
// Well-known GUIDs for LanguageVendorGuid are:
// 994b45c4-e6e9-11d2-903f-00c04fa302a1 Microsoft
//
// Well-known GUIDs for DocumentTypeGuid are:
// 5a869d0b-6611-11d3-bd2a-0000f80849bd Text
//
// Well-known GUIDs for HashFunctionGuid are:
// 406ea660-64cf-4c82-b6f0-42d48172a799 MD5 (HashDataSize is 16)
// ff1816ec-aa5e-4d10-87f7-6f4963833460 SHA1 (HashDataSize is 20)
// 8829d00f-11b8-4213-878b-770e8597ac16 SHA256 (HashDataSize is 32)
DotNet = 101,
};
};
/// These values correspond to the CV_call_e enumeration, and are documented

View File

@ -41,11 +41,11 @@ std::string DIAInjectedSource::getVirtualFileName() const {
&IDiaInjectedSource::get_virtualFilename);
}
PDB_SourceCompression DIAInjectedSource::getCompression() const {
uint32_t DIAInjectedSource::getCompression() const {
DWORD Compression = 0;
if (S_OK != SourceFile->get_sourceCompression(&Compression))
return PDB_SourceCompression::None;
return static_cast<PDB_SourceCompression>(Compression);
return static_cast<uint32_t>(Compression);
}
std::string DIAInjectedSource::getCode() const {

View File

@ -17,14 +17,15 @@ namespace pdb {
namespace {
Expected<std::string> readStreamData(BinaryStream &Stream) {
uint32_t Offset = 0, DataLength = Stream.getLength();
Expected<std::string> readStreamData(BinaryStream &Stream, uint32_t Limit) {
uint32_t Offset = 0, DataLength = std::min(Limit, Stream.getLength());
std::string Result;
Result.reserve(DataLength);
while (Offset < DataLength) {
ArrayRef<uint8_t> Data;
if (auto E = Stream.readLongestContiguousChunk(Offset, Data))
return std::move(E);
Data = Data.take_front(DataLength - Offset);
Offset += Data.size();
Result += toStringRef(Data);
}
@ -62,9 +63,7 @@ public:
return *VName;
}
PDB_SourceCompression getCompression() const override {
return static_cast<PDB_SourceCompression>(Entry.Compression);
}
uint32_t getCompression() const override { return Entry.Compression; }
std::string getCode() const override {
// Get name of stream storing the data.
@ -81,7 +80,7 @@ public:
return "(failed to open data stream)";
}
auto Data = readStreamData(**ExpectedFileStream);
auto Data = readStreamData(**ExpectedFileStream, Entry.FileSize);
if (!Data) {
consumeError(Data.takeError());
return "(failed to read data)";

View File

@ -320,14 +320,17 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
return OS;
}
raw_ostream &llvm::pdb::operator<<(raw_ostream &OS,
const PDB_SourceCompression &Compression) {
raw_ostream &llvm::pdb::dumpPDBSourceCompression(raw_ostream &OS,
uint32_t Compression) {
switch (Compression) {
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, None, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, Huffman, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, LZ, OS)
CASE_OUTPUT_ENUM_CLASS_STR(PDB_SourceCompression, RunLengthEncoded, "RLE",
OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_SourceCompression, DotNet, OS)
default:
OS << "Unknown (" << Compression << ")";
}
return OS;
}

Binary file not shown.

View File

@ -28,3 +28,48 @@
; NEGATIVE: ---INJECTED SOURCES---
; NEGATIVE-NEXT: There are no injected sources.
; PDB created by running `csc /debug Hello.cs`
; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
; RUN: %p/Inputs/dotnet_hashonly.pdb | FileCheck --check-prefix=HASH %s
; HASH: ---INJECTED SOURCES---
; HASH: C:\src\llvm-mono\Hello.cs (92 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=269413292, compression=DotNet
; HASH-NEXT: Compressed data (
; HASH-NEXT: 0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1 |.bQ?.....S..O....EK......?..O...|
; HASH-NEXT: 0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460 |...Z.f...*....I.....^..M..oIc.4`|
; HASH-NEXT: 0040: 14000000 00000000 17299CBE 74FDDF66 FFCD5E08 CE34A775 D464C611 |.........)..t..f..^..4.u.d..|
; HASH-NEXT: )
; PDB created by running `csc /debug Hello.cs` with Hello.cs smaller than 200 bytes.
; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
; RUN: %p/Inputs/dotnet_contents_uncompressed.pdb | FileCheck --check-prefix=UNCOMP %s
; UNCOMP: ---INJECTED SOURCES---
; UNCOMP: C:\src\llvm-mono\Hello.cs (232 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=323787205, compression=DotNet
; UNCOMP-NEXT: Compressed data (
; UNCOMP-NEXT: 0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1 |.bQ?.....S..O....EK......?..O...|
; UNCOMP-NEXT: 0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460 |...Z.f...*....I.....^..M..oIc.4`|
; UNCOMP-NEXT: 0040: 14000000 8C000000 17299CBE 74FDDF66 FFCD5E08 CE34A775 D464C611 00000000 |.........)..t..f..^..4.u.d......|
; UNCOMP-NEXT: 0060: 6E616D65 73706163 65204865 6C6C6F57 6F726C64 207B0D0A 636C6173 73204865 |namespace HelloWorld {..class He|
; UNCOMP-NEXT: 0080: 6C6C6F20 7B0D0A20 20737461 74696320 766F6964 204D6169 6E282920 7B205379 |llo {.. static void Main() { Sy|
; UNCOMP-NEXT: 00A0: 7374656D 2E436F6E 736F6C65 2E577269 74654C69 6E652822 48656C6C 6F206173 |stem.Console.WriteLine("Hello as|
; UNCOMP-NEXT: 00C0: 64666A6B 6C777763 6F697762 72796669 75667566 20576F72 6C642122 293B207D |dfjklwwcoiwbryfiufuf World!"); }|
; UNCOMP-NEXT: 00E0: 0D0A7D0D 0A7D0D0A |..}..}..|
; UNCOMP-NEXT: )
; PDB created by running `csc /debug Hello.cs` with Hello.cs larger than 200 bytes.
; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
; RUN: %p/Inputs/dotnet_contents_compressed.pdb | FileCheck --check-prefix=COMP %s
; COMP: ---INJECTED SOURCES---
; COMP: C:\src\llvm-mono\Hello.cs (218 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=616104201, compression=DotNet
; COMP-NEXT: Compressed data (
; COMP-NEXT: 0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1 |.bQ?.....S..O....EK......?..O...|
; COMP-NEXT: 0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460 |...Z.f...*....I.....^..M..oIc.4`|
; COMP-NEXT: 0040: 14000000 7E000000 52CD36A0 6A9824CD A3034543 7FA9765E D572DA21 FB000000 |....~...R.6.j.$...EC..v^.r.!....|
; COMP-NEXT: 0060: CB4BCC4D 2D2E484C 4E55F048 CDC9C90F CF2FCA49 51A8E6E5 4ACE492C 2E868881 |.K.M-.HLNU.H...../.IQ...J.I,....|
; COMP-NEXT: 0080: B80A0AC5 25892599 C90A65F9 99290ABE 8999791A 9A0AD50A C195C525 A9B97ACE |....%.%...e..)....y........%..z.|
; COMP-NEXT: 00A0: F979C5F9 39A97AE1 459925A9 3E9979A9 1A4A109D 89C52969 59D939E5 E5C9F999 |.y..9.z.E.%.>.y..J....)iY.9.....|
; COMP-NEXT: 00C0: E5494595 6999A569 A5690321 0CF698A2 92A6B542 2D2F1704 0100 |.IE.i..i.i.!.......B-/....|
; COMP-NEXT: )

View File

@ -27,3 +27,48 @@
; NEGATIVE: ---INJECTED SOURCES---
; NEGATIVE-NEXT: There are no injected sources.
; PDB created by running `csc /debug Hello.cs`
; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
; RUN: %p/Inputs/dotnet_hashonly.pdb | FileCheck --check-prefix=HASH %s
; HASH: ---INJECTED SOURCES---
; HASH: C:\src\llvm-mono\Hello.cs (92 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=269413292, compression=DotNet
; HASH-NEXT: Compressed data (
; HASH-NEXT: 0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1 |.bQ?.....S..O....EK......?..O...|
; HASH-NEXT: 0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460 |...Z.f...*....I.....^..M..oIc.4`|
; HASH-NEXT: 0040: 14000000 00000000 17299CBE 74FDDF66 FFCD5E08 CE34A775 D464C611 |.........)..t..f..^..4.u.d..|
; HASH-NEXT: )
; PDB created by running `csc /debug Hello.cs` with Hello.cs smaller than 200 bytes.
; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
; RUN: %p/Inputs/dotnet_contents_uncompressed.pdb | FileCheck --check-prefix=UNCOMP %s
; UNCOMP: ---INJECTED SOURCES---
; UNCOMP: C:\src\llvm-mono\Hello.cs (232 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=323787205, compression=DotNet
; UNCOMP-NEXT: Compressed data (
; UNCOMP-NEXT: 0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1 |.bQ?.....S..O....EK......?..O...|
; UNCOMP-NEXT: 0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460 |...Z.f...*....I.....^..M..oIc.4`|
; UNCOMP-NEXT: 0040: 14000000 8C000000 17299CBE 74FDDF66 FFCD5E08 CE34A775 D464C611 00000000 |.........)..t..f..^..4.u.d......|
; UNCOMP-NEXT: 0060: 6E616D65 73706163 65204865 6C6C6F57 6F726C64 207B0D0A 636C6173 73204865 |namespace HelloWorld {..class He|
; UNCOMP-NEXT: 0080: 6C6C6F20 7B0D0A20 20737461 74696320 766F6964 204D6169 6E282920 7B205379 |llo {.. static void Main() { Sy|
; UNCOMP-NEXT: 00A0: 7374656D 2E436F6E 736F6C65 2E577269 74654C69 6E652822 48656C6C 6F206173 |stem.Console.WriteLine("Hello as|
; UNCOMP-NEXT: 00C0: 64666A6B 6C777763 6F697762 72796669 75667566 20576F72 6C642122 293B207D |dfjklwwcoiwbryfiufuf World!"); }|
; UNCOMP-NEXT: 00E0: 0D0A7D0D 0A7D0D0A |..}..}..|
; UNCOMP-NEXT: )
; PDB created by running `csc /debug Hello.cs` with Hello.cs larger than 200 bytes.
; RUN: llvm-pdbutil pretty -native -injected-sources -injected-source-content \
; RUN: %p/Inputs/dotnet_contents_compressed.pdb | FileCheck --check-prefix=COMP %s
; COMP: ---INJECTED SOURCES---
; COMP: C:\src\llvm-mono\Hello.cs (218 bytes): obj=<null>, vname=c:\src\llvm-mono\hello.cs, crc=616104201, compression=DotNet
; COMP-NEXT: Compressed data (
; COMP-NEXT: 0000: F862513F C607D311 905300C0 4FA302A1 C4454B99 E9E6D211 903F00C0 4FA302A1 |.bQ?.....S..O....EK......?..O...|
; COMP-NEXT: 0020: 0B9D865A 1166D311 BD2A0000 F80849BD EC1618FF 5EAA104D 87F76F49 63833460 |...Z.f...*....I.....^..M..oIc.4`|
; COMP-NEXT: 0040: 14000000 7E000000 52CD36A0 6A9824CD A3034543 7FA9765E D572DA21 FB000000 |....~...R.6.j.$...EC..v^.r.!....|
; COMP-NEXT: 0060: CB4BCC4D 2D2E484C 4E55F048 CDC9C90F CF2FCA49 51A8E6E5 4ACE492C 2E868881 |.K.M-.HLNU.H...../.IQ...J.I,....|
; COMP-NEXT: 0080: B80A0AC5 25892599 C90A65F9 99290ABE 8999791A 9A0AD50A C195C525 A9B97ACE |....%.%...e..)....y........%..z.|
; COMP-NEXT: 00A0: F979C5F9 39A97AE1 459925A9 3E9979A9 1A4A109D 89C52969 59D939E5 E5C9F999 |.y..9.z.E.%.>.y..J....)iY.9.....|
; COMP-NEXT: 00C0: E5494595 6999A569 A5690321 0CF698A2 92A6B542 2D2F1704 0100 |.IE.i..i.i.!.......B-/....|
; COMP-NEXT: )

View File

@ -132,8 +132,7 @@ struct AutoIndent {
template <class T>
inline raw_ostream &operator<<(LinePrinter &Printer, const T &Item) {
Printer.getStream() << Item;
return Printer.getStream();
return Printer.getStream() << Item;
}
enum class PDB_ColorItem {

View File

@ -947,9 +947,6 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
std::string VFName = stringOr(IS->getVirtualFileName(), "<null>");
uint32_t CRC = IS->getCrc32();
std::string CompressionStr;
llvm::raw_string_ostream Stream(CompressionStr);
Stream << IS->getCompression();
WithColor(Printer, PDB_ColorItem::Path).get() << File;
Printer << " (";
WithColor(Printer, PDB_ColorItem::LiteralValue).get() << Size;
@ -968,7 +965,9 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
Printer << ", ";
WithColor(Printer, PDB_ColorItem::Keyword).get() << "compression";
Printer << "=";
WithColor(Printer, PDB_ColorItem::LiteralValue).get() << Stream.str();
dumpPDBSourceCompression(
WithColor(Printer, PDB_ColorItem::LiteralValue).get(),
IS->getCompression());
if (!opts::pretty::ShowInjectedSourceContent)
continue;
@ -977,7 +976,12 @@ static void dumpInjectedSources(LinePrinter &Printer, IPDBSession &Session) {
int Indent = Printer.getIndentLevel();
Printer.Unindent(Indent);
Printer.printLine(IS->getCode());
if (IS->getCompression() == PDB_SourceCompression::None)
Printer.printLine(IS->getCode());
else
Printer.formatBinary("Compressed data",
arrayRefFromStringRef(IS->getCode()),
/*StartOffset=*/0);
// Re-indent back to the original level.
Printer.Indent(Indent);