Files
archived-llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
Zachary Turner 505c76a926 [PDB] Don't build the entire source file list up front.
I tried to run llvm-pdbdump on a very large (~1.5GB) PDB to
try and identify show-stopping performance problems.  This
patch addresses the first such problem.

When loading the DBI stream, before anyone has even tried to
access a single record, we build an in memory map of every
source file for every module.  In the particular PDB I was
using, this was over 85 million files.  Specifically, the
complexity is O(m*n) where m is the number of modules and
n is the average number of source files (including headers)
per module.

The whole reason for doing this was so that we could have
constant time access to any module and any of its source
file lists.  However, we can still get O(1) access to the
source file list for a given module with a simple O(m)
precomputation, and access to the list of modules is
already O(1) anyway.

So this patches reduces the O(m*n) up-front precomputation
to an O(m) one, where n is ~6,500 and n*m is about 85 million
in my pathological test case.

Differential Revision: https://reviews.llvm.org/D32870

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302205 91177308-0d34-0410-b5e6-96231b3b80d8
2017-05-04 23:53:29 +00:00

114 lines
3.8 KiB
C++

//===- DbiModuleList.h - PDB module information list ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H
#define LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h"
#include "llvm/Support/BinaryStreamArray.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <cstdint>
#include <vector>
namespace llvm {
namespace pdb {
class DbiModuleList;
struct FileInfoSubstreamHeader;
class DbiModuleSourceFilesIterator
: public iterator_facade_base<DbiModuleSourceFilesIterator,
std::random_access_iterator_tag, StringRef> {
typedef iterator_facade_base<DbiModuleSourceFilesIterator,
std::random_access_iterator_tag, StringRef>
BaseType;
public:
DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi,
uint16_t Filei);
DbiModuleSourceFilesIterator() = default;
DbiModuleSourceFilesIterator &
operator=(const DbiModuleSourceFilesIterator &R) = default;
bool operator==(const DbiModuleSourceFilesIterator &R) const;
const StringRef &operator*() const { return ThisValue; }
StringRef &operator*() { return ThisValue; }
bool operator<(const DbiModuleSourceFilesIterator &RHS) const;
std::ptrdiff_t operator-(const DbiModuleSourceFilesIterator &R) const;
DbiModuleSourceFilesIterator &operator+=(std::ptrdiff_t N);
DbiModuleSourceFilesIterator &operator-=(std::ptrdiff_t N);
private:
void setValue();
bool isEnd() const;
bool isCompatible(const DbiModuleSourceFilesIterator &R) const;
bool isUniversalEnd() const;
StringRef ThisValue;
const DbiModuleList *Modules{nullptr};
uint32_t Modi{0};
uint16_t Filei{0};
};
class DbiModuleList {
friend DbiModuleSourceFilesIterator;
public:
Error initialize(BinaryStreamRef ModInfo, BinaryStreamRef FileInfo);
Expected<StringRef> getFileName(uint32_t Index) const;
uint32_t getModuleCount() const;
uint32_t getSourceFileCount() const;
uint16_t getSourceFileCount(uint32_t Modi) const;
iterator_range<DbiModuleSourceFilesIterator>
source_files(uint32_t Modi) const;
DbiModuleDescriptor getModuleDescriptor(uint32_t Modi) const;
private:
Error initializeModInfo(BinaryStreamRef ModInfo);
Error initializeFileInfo(BinaryStreamRef FileInfo);
VarStreamArray<DbiModuleDescriptor> Descriptors;
FixedStreamArray<support::little32_t> FileNameOffsets;
FixedStreamArray<support::ulittle16_t> ModFileCountArray;
// For each module, there are multiple filenames, which can be obtained by
// knowing the index of the file. Given the index of the file, one can use
// that as an offset into the FileNameOffsets array, which contains the
// absolute offset of the file name in NamesBuffer. Thus, for each module
// we store the first index in the FileNameOffsets array for this module.
// The number of files for the corresponding module is stored in
// ModFileCountArray.
std::vector<uint32_t> ModuleInitialFileIndex;
// In order to provide random access into the Descriptors array, we iterate it
// once up front to find the offsets of the individual items and store them in
// this array.
std::vector<uint32_t> ModuleDescriptorOffsets;
const FileInfoSubstreamHeader *FileInfoHeader = nullptr;
BinaryStreamRef ModInfoSubstream;
BinaryStreamRef FileInfoSubstream;
BinaryStreamRef NamesBuffer;
};
}
}
#endif // LLVM_DEBUGINFO_PDB_RAW_DBIMODULELIST_H