llvm/lib/Bitcode/Writer/ValueEnumerator.h
Duncan P. N. Exon Smith d7084fcebd Bitcode: Collect all MDString records into a single blob
Optimize output of MDStrings in bitcode.  This emits them in big blocks
(currently 1024) in a pair of records:
  - BULK_STRING_SIZES: the sizes of the strings in the block, and
  - BULK_STRING_DATA: a single blob, which is the concatenation of all
    the strings.

Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this
should (a) slightly reduce bitcode size, since there is less record
overhead, and (b) greatly improve reading speed, since blobs are super
cheap to deserialize.

I needed to add support for blobs to streaming input to get the test
suite passing.
  - StreamingMemoryObject::getPointer reads ahead and returns the
    address of the blob.
  - To avoid a possible reallocation of StreamingMemoryObject::Bytes,
    BitstreamCursor::readRecord needs to move the call to JumpToEnd
    forward so that getPointer is the last bitstream operation.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@264409 91177308-0d34-0410-b5e6-96231b3b80d8
2016-03-25 14:40:18 +00:00

217 lines
7.0 KiB
C++

//===-- Bitcode/Writer/ValueEnumerator.h - Number values --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class gives values and types Unique ID's.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_BITCODE_WRITER_VALUEENUMERATOR_H
#define LLVM_LIB_BITCODE_WRITER_VALUEENUMERATOR_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/UseListOrder.h"
#include <vector>
namespace llvm {
class Type;
class Value;
class Instruction;
class BasicBlock;
class Comdat;
class Function;
class Module;
class Metadata;
class LocalAsMetadata;
class MDNode;
class NamedMDNode;
class AttributeSet;
class ValueSymbolTable;
class MDSymbolTable;
class raw_ostream;
class ValueEnumerator {
public:
typedef std::vector<Type*> TypeList;
// For each value, we remember its Value* and occurrence frequency.
typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
UseListOrderStack UseListOrders;
private:
typedef DenseMap<Type*, unsigned> TypeMapType;
TypeMapType TypeMap;
TypeList Types;
typedef DenseMap<const Value*, unsigned> ValueMapType;
ValueMapType ValueMap;
ValueList Values;
typedef UniqueVector<const Comdat *> ComdatSetType;
ComdatSetType Comdats;
std::vector<const Metadata *> MDs;
SmallVector<const LocalAsMetadata *, 8> FunctionLocalMDs;
typedef DenseMap<const Metadata *, unsigned> MetadataMapType;
MetadataMapType MetadataMap;
unsigned NumMDStrings = 0;
bool ShouldPreserveUseListOrder;
typedef DenseMap<AttributeSet, unsigned> AttributeGroupMapType;
AttributeGroupMapType AttributeGroupMap;
std::vector<AttributeSet> AttributeGroups;
typedef DenseMap<AttributeSet, unsigned> AttributeMapType;
AttributeMapType AttributeMap;
std::vector<AttributeSet> Attribute;
/// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
/// the "getGlobalBasicBlockID" method.
mutable DenseMap<const BasicBlock*, unsigned> GlobalBasicBlockIDs;
typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
InstructionMapType InstructionMap;
unsigned InstructionCount;
/// BasicBlocks - This contains all the basic blocks for the currently
/// incorporated function. Their reverse mapping is stored in ValueMap.
std::vector<const BasicBlock*> BasicBlocks;
/// When a function is incorporated, this is the size of the Values list
/// before incorporation.
unsigned NumModuleValues;
/// When a function is incorporated, this is the size of the Metadatas list
/// before incorporation.
unsigned NumModuleMDs;
unsigned FirstFuncConstantID;
unsigned FirstInstID;
ValueEnumerator(const ValueEnumerator &) = delete;
void operator=(const ValueEnumerator &) = delete;
public:
ValueEnumerator(const Module &M, bool ShouldPreserveUseListOrder);
void dump() const;
void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
void print(raw_ostream &OS, const MetadataMapType &Map,
const char *Name) const;
unsigned getValueID(const Value *V) const;
unsigned getMetadataID(const Metadata *MD) const {
auto ID = getMetadataOrNullID(MD);
assert(ID != 0 && "Metadata not in slotcalculator!");
return ID - 1;
}
unsigned getMetadataOrNullID(const Metadata *MD) const {
return MetadataMap.lookup(MD);
}
unsigned numMDs() const { return MDs.size(); }
bool shouldPreserveUseListOrder() const { return ShouldPreserveUseListOrder; }
unsigned getTypeID(Type *T) const {
TypeMapType::const_iterator I = TypeMap.find(T);
assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
return I->second-1;
}
unsigned getInstructionID(const Instruction *I) const;
void setInstructionID(const Instruction *I);
unsigned getAttributeID(AttributeSet PAL) const {
if (PAL.isEmpty()) return 0; // Null maps to zero.
AttributeMapType::const_iterator I = AttributeMap.find(PAL);
assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!");
return I->second;
}
unsigned getAttributeGroupID(AttributeSet PAL) const {
if (PAL.isEmpty()) return 0; // Null maps to zero.
AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(PAL);
assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!");
return I->second;
}
/// getFunctionConstantRange - Return the range of values that corresponds to
/// function-local constants.
void getFunctionConstantRange(unsigned &Start, unsigned &End) const {
Start = FirstFuncConstantID;
End = FirstInstID;
}
const ValueList &getValues() const { return Values; }
const std::vector<const Metadata *> &getMDs() const { return MDs; }
ArrayRef<const Metadata *> getMDStrings() const {
return makeArrayRef(MDs).slice(0, NumMDStrings);
}
ArrayRef<const Metadata *> getNonMDStrings() const {
return makeArrayRef(MDs).slice(NumMDStrings);
}
const SmallVectorImpl<const LocalAsMetadata *> &getFunctionLocalMDs() const {
return FunctionLocalMDs;
}
const TypeList &getTypes() const { return Types; }
const std::vector<const BasicBlock*> &getBasicBlocks() const {
return BasicBlocks;
}
const std::vector<AttributeSet> &getAttributes() const {
return Attribute;
}
const std::vector<AttributeSet> &getAttributeGroups() const {
return AttributeGroups;
}
const ComdatSetType &getComdats() const { return Comdats; }
unsigned getComdatID(const Comdat *C) const;
/// getGlobalBasicBlockID - This returns the function-specific ID for the
/// specified basic block. This is relatively expensive information, so it
/// should only be used by rare constructs such as address-of-label.
unsigned getGlobalBasicBlockID(const BasicBlock *BB) const;
/// incorporateFunction/purgeFunction - If you'd like to deal with a function,
/// use these two methods to get its data into the ValueEnumerator!
///
void incorporateFunction(const Function &F);
void purgeFunction();
uint64_t computeBitsRequiredForTypeIndicies() const;
private:
void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
// Reorder the reachable metadata. This is not just an optimization, but is
// mandatory for emitting MDString correctly.
void organizeMetadata();
void EnumerateMDNodeOperands(const MDNode *N);
void EnumerateMetadata(const Metadata *MD);
void EnumerateFunctionLocalMetadata(const LocalAsMetadata *Local);
void EnumerateNamedMDNode(const NamedMDNode *NMD);
void EnumerateValue(const Value *V);
void EnumerateType(Type *T);
void EnumerateOperandType(const Value *V);
void EnumerateAttributes(AttributeSet PAL);
void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
void EnumerateNamedMetadata(const Module &M);
};
} // End llvm namespace
#endif