mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-16 16:37:42 +00:00
OnDiskHashTable: Expect the Info type to declare the offset type
This changes the on-disk hash to get the type to use for offsets from the Info type, so that clients can be more flexible with the size of table they support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206643 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
acf59d9738
commit
abc5eea499
@ -40,11 +40,12 @@ namespace llvm {
|
|||||||
/// typedef ExampleData data_type; // Must be copy constructible
|
/// typedef ExampleData data_type; // Must be copy constructible
|
||||||
/// typedef ExampleData &data_type_ref;
|
/// typedef ExampleData &data_type_ref;
|
||||||
/// typedef uint32_t hash_value_type; // The type the hash function returns.
|
/// typedef uint32_t hash_value_type; // The type the hash function returns.
|
||||||
|
/// typedef uint32_t offset_type; // The type for offsets into the table.
|
||||||
///
|
///
|
||||||
/// /// Calculate the hash for Key
|
/// /// Calculate the hash for Key
|
||||||
/// static hash_value_type ComputeHash(key_type_ref Key);
|
/// static hash_value_type ComputeHash(key_type_ref Key);
|
||||||
/// /// Return the lengths, in bytes, of the given Key/Data pair.
|
/// /// Return the lengths, in bytes, of the given Key/Data pair.
|
||||||
/// static std::pair<unsigned, unsigned>
|
/// static std::pair<offset_type, offset_type>
|
||||||
/// EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data);
|
/// EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data);
|
||||||
/// /// Write Key to Out. KeyLen is the length from EmitKeyDataLength.
|
/// /// Write Key to Out. KeyLen is the length from EmitKeyDataLength.
|
||||||
/// static void EmitKey(raw_ostream &Out, key_type_ref Key, unsigned KeyLen);
|
/// static void EmitKey(raw_ostream &Out, key_type_ref Key, unsigned KeyLen);
|
||||||
@ -54,8 +55,9 @@ namespace llvm {
|
|||||||
/// };
|
/// };
|
||||||
/// \endcode
|
/// \endcode
|
||||||
template <typename Info> class OnDiskChainedHashTableGenerator {
|
template <typename Info> class OnDiskChainedHashTableGenerator {
|
||||||
unsigned NumBuckets;
|
typedef typename Info::offset_type offset_type;
|
||||||
unsigned NumEntries;
|
offset_type NumBuckets;
|
||||||
|
offset_type NumEntries;
|
||||||
llvm::BumpPtrAllocator BA;
|
llvm::BumpPtrAllocator BA;
|
||||||
|
|
||||||
/// \brief A single item in the hash table.
|
/// \brief A single item in the hash table.
|
||||||
@ -74,7 +76,7 @@ template <typename Info> class OnDiskChainedHashTableGenerator {
|
|||||||
/// \brief A linked list of values in a particular hash bucket.
|
/// \brief A linked list of values in a particular hash bucket.
|
||||||
class Bucket {
|
class Bucket {
|
||||||
public:
|
public:
|
||||||
uint32_t Off;
|
offset_type Off;
|
||||||
Item *Head;
|
Item *Head;
|
||||||
unsigned Length;
|
unsigned Length;
|
||||||
|
|
||||||
@ -96,7 +98,7 @@ private:
|
|||||||
void resize(size_t NewSize) {
|
void resize(size_t NewSize) {
|
||||||
Bucket *NewBuckets = (Bucket *)std::calloc(NewSize, sizeof(Bucket));
|
Bucket *NewBuckets = (Bucket *)std::calloc(NewSize, sizeof(Bucket));
|
||||||
// Populate NewBuckets with the old entries.
|
// Populate NewBuckets with the old entries.
|
||||||
for (unsigned I = 0; I < NumBuckets; ++I)
|
for (size_t I = 0; I < NumBuckets; ++I)
|
||||||
for (Item *E = Buckets[I].Head; E;) {
|
for (Item *E = Buckets[I].Head; E;) {
|
||||||
Item *N = E->Next;
|
Item *N = E->Next;
|
||||||
E->Next = 0;
|
E->Next = 0;
|
||||||
@ -131,7 +133,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// \brief Emit the table to Out, which must not be at offset 0.
|
/// \brief Emit the table to Out, which must not be at offset 0.
|
||||||
uint32_t Emit(raw_ostream &Out) {
|
offset_type Emit(raw_ostream &Out) {
|
||||||
Info InfoObj;
|
Info InfoObj;
|
||||||
return Emit(Out, InfoObj);
|
return Emit(Out, InfoObj);
|
||||||
}
|
}
|
||||||
@ -139,12 +141,12 @@ public:
|
|||||||
/// \brief Emit the table to Out, which must not be at offset 0.
|
/// \brief Emit the table to Out, which must not be at offset 0.
|
||||||
///
|
///
|
||||||
/// Uses the provided Info instead of a stack allocated one.
|
/// Uses the provided Info instead of a stack allocated one.
|
||||||
uint32_t Emit(raw_ostream &Out, Info &InfoObj) {
|
offset_type Emit(raw_ostream &Out, Info &InfoObj) {
|
||||||
using namespace llvm::support;
|
using namespace llvm::support;
|
||||||
endian::Writer<little> LE(Out);
|
endian::Writer<little> LE(Out);
|
||||||
|
|
||||||
// Emit the payload of the table.
|
// Emit the payload of the table.
|
||||||
for (unsigned I = 0; I < NumBuckets; ++I) {
|
for (size_t I = 0; I < NumBuckets; ++I) {
|
||||||
Bucket &B = Buckets[I];
|
Bucket &B = Buckets[I];
|
||||||
if (!B.Head)
|
if (!B.Head)
|
||||||
continue;
|
continue;
|
||||||
@ -160,7 +162,7 @@ public:
|
|||||||
// Write out the entries in the bucket.
|
// Write out the entries in the bucket.
|
||||||
for (Item *I = B.Head; I; I = I->Next) {
|
for (Item *I = B.Head; I; I = I->Next) {
|
||||||
LE.write<typename Info::hash_value_type>(I->Hash);
|
LE.write<typename Info::hash_value_type>(I->Hash);
|
||||||
const std::pair<unsigned, unsigned> &Len =
|
const std::pair<offset_type, offset_type> &Len =
|
||||||
InfoObj.EmitKeyDataLength(Out, I->Key, I->Data);
|
InfoObj.EmitKeyDataLength(Out, I->Key, I->Data);
|
||||||
InfoObj.EmitKey(Out, I->Key, Len.first);
|
InfoObj.EmitKey(Out, I->Key, Len.first);
|
||||||
InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
|
InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
|
||||||
@ -168,17 +170,17 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pad with zeros so that we can start the hashtable at an aligned address.
|
// Pad with zeros so that we can start the hashtable at an aligned address.
|
||||||
uint32_t TableOff = Out.tell();
|
offset_type TableOff = Out.tell();
|
||||||
uint64_t N = llvm::OffsetToAlignment(TableOff, alignOf<uint32_t>());
|
uint64_t N = llvm::OffsetToAlignment(TableOff, alignOf<offset_type>());
|
||||||
TableOff += N;
|
TableOff += N;
|
||||||
while (N--)
|
while (N--)
|
||||||
LE.write<uint8_t>(0);
|
LE.write<uint8_t>(0);
|
||||||
|
|
||||||
// Emit the hashtable itself.
|
// Emit the hashtable itself.
|
||||||
LE.write<uint32_t>(NumBuckets);
|
LE.write<offset_type>(NumBuckets);
|
||||||
LE.write<uint32_t>(NumEntries);
|
LE.write<offset_type>(NumEntries);
|
||||||
for (unsigned I = 0; I < NumBuckets; ++I)
|
for (size_t I = 0; I < NumBuckets; ++I)
|
||||||
LE.write<uint32_t>(Buckets[I].Off);
|
LE.write<offset_type>(Buckets[I].Off);
|
||||||
|
|
||||||
return TableOff;
|
return TableOff;
|
||||||
}
|
}
|
||||||
@ -207,6 +209,7 @@ public:
|
|||||||
/// typedef ExampleInternalKey internal_key_type; // The stored key type.
|
/// typedef ExampleInternalKey internal_key_type; // The stored key type.
|
||||||
/// typedef ExampleKey external_key_type; // The type to pass to find().
|
/// typedef ExampleKey external_key_type; // The type to pass to find().
|
||||||
/// typedef uint32_t hash_value_type; // The type the hash function returns.
|
/// typedef uint32_t hash_value_type; // The type the hash function returns.
|
||||||
|
/// typedef uint32_t offset_type; // The type for offsets into the table.
|
||||||
///
|
///
|
||||||
/// /// Compare two keys for equality.
|
/// /// Compare two keys for equality.
|
||||||
/// static bool EqualKey(internal_key_type &Key1, internal_key_type &Key2);
|
/// static bool EqualKey(internal_key_type &Key1, internal_key_type &Key2);
|
||||||
@ -219,7 +222,7 @@ public:
|
|||||||
/// static const internal_key_type &GetInternalKey(external_key_type &EKey);
|
/// static const internal_key_type &GetInternalKey(external_key_type &EKey);
|
||||||
/// /// Read the key and data length from Buffer, leaving it pointing at the
|
/// /// Read the key and data length from Buffer, leaving it pointing at the
|
||||||
/// /// following byte.
|
/// /// following byte.
|
||||||
/// static std::pair<unsigned, unsigned>
|
/// static std::pair<offset_type, offset_type>
|
||||||
/// ReadKeyDataLength(const unsigned char *&Buffer);
|
/// ReadKeyDataLength(const unsigned char *&Buffer);
|
||||||
/// /// Read the key from Buffer, given the KeyLen as reported from
|
/// /// Read the key from Buffer, given the KeyLen as reported from
|
||||||
/// /// ReadKeyDataLength.
|
/// /// ReadKeyDataLength.
|
||||||
@ -232,8 +235,8 @@ public:
|
|||||||
/// };
|
/// };
|
||||||
/// \endcode
|
/// \endcode
|
||||||
template <typename Info> class OnDiskChainedHashTable {
|
template <typename Info> class OnDiskChainedHashTable {
|
||||||
const unsigned NumBuckets;
|
const typename Info::offset_type NumBuckets;
|
||||||
const unsigned NumEntries;
|
const typename Info::offset_type NumEntries;
|
||||||
const unsigned char *const Buckets;
|
const unsigned char *const Buckets;
|
||||||
const unsigned char *const Base;
|
const unsigned char *const Base;
|
||||||
Info InfoObj;
|
Info InfoObj;
|
||||||
@ -243,8 +246,9 @@ public:
|
|||||||
typedef typename Info::external_key_type external_key_type;
|
typedef typename Info::external_key_type external_key_type;
|
||||||
typedef typename Info::data_type data_type;
|
typedef typename Info::data_type data_type;
|
||||||
typedef typename Info::hash_value_type hash_value_type;
|
typedef typename Info::hash_value_type hash_value_type;
|
||||||
|
typedef typename Info::offset_type offset_type;
|
||||||
|
|
||||||
OnDiskChainedHashTable(unsigned NumBuckets, unsigned NumEntries,
|
OnDiskChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
|
||||||
const unsigned char *Buckets,
|
const unsigned char *Buckets,
|
||||||
const unsigned char *Base,
|
const unsigned char *Base,
|
||||||
const Info &InfoObj = Info())
|
const Info &InfoObj = Info())
|
||||||
@ -254,8 +258,8 @@ public:
|
|||||||
"'buckets' must have a 4-byte alignment");
|
"'buckets' must have a 4-byte alignment");
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned getNumBuckets() const { return NumBuckets; }
|
offset_type getNumBuckets() const { return NumBuckets; }
|
||||||
unsigned getNumEntries() const { return NumEntries; }
|
offset_type getNumEntries() const { return NumEntries; }
|
||||||
const unsigned char *getBase() const { return Base; }
|
const unsigned char *getBase() const { return Base; }
|
||||||
const unsigned char *getBuckets() const { return Buckets; }
|
const unsigned char *getBuckets() const { return Buckets; }
|
||||||
|
|
||||||
@ -287,11 +291,11 @@ public:
|
|||||||
const internal_key_type &IKey = InfoObj.GetInternalKey(EKey);
|
const internal_key_type &IKey = InfoObj.GetInternalKey(EKey);
|
||||||
hash_value_type KeyHash = InfoObj.ComputeHash(IKey);
|
hash_value_type KeyHash = InfoObj.ComputeHash(IKey);
|
||||||
|
|
||||||
// Each bucket is just a 32-bit offset into the hash table file.
|
// Each bucket is just an offset into the hash table file.
|
||||||
unsigned Idx = KeyHash & (NumBuckets - 1);
|
offset_type Idx = KeyHash & (NumBuckets - 1);
|
||||||
const unsigned char *Bucket = Buckets + sizeof(uint32_t) * Idx;
|
const unsigned char *Bucket = Buckets + sizeof(offset_type) * Idx;
|
||||||
|
|
||||||
unsigned Offset = endian::readNext<uint32_t, little, aligned>(Bucket);
|
offset_type Offset = endian::readNext<offset_type, little, aligned>(Bucket);
|
||||||
if (Offset == 0)
|
if (Offset == 0)
|
||||||
return iterator(); // Empty bucket.
|
return iterator(); // Empty bucket.
|
||||||
const unsigned char *Items = Base + Offset;
|
const unsigned char *Items = Base + Offset;
|
||||||
@ -306,8 +310,9 @@ public:
|
|||||||
endian::readNext<hash_value_type, little, unaligned>(Items);
|
endian::readNext<hash_value_type, little, unaligned>(Items);
|
||||||
|
|
||||||
// Determine the length of the key and the data.
|
// Determine the length of the key and the data.
|
||||||
const std::pair<unsigned, unsigned> &L = Info::ReadKeyDataLength(Items);
|
const std::pair<offset_type, offset_type> &L =
|
||||||
unsigned ItemLen = L.first + L.second;
|
Info::ReadKeyDataLength(Items);
|
||||||
|
offset_type ItemLen = L.first + L.second;
|
||||||
|
|
||||||
// Compare the hashes. If they are not the same, skip the entry entirely.
|
// Compare the hashes. If they are not the same, skip the entry entirely.
|
||||||
if (ItemHash != KeyHash) {
|
if (ItemHash != KeyHash) {
|
||||||
@ -353,8 +358,10 @@ public:
|
|||||||
assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
|
assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
|
||||||
"buckets should be 4-byte aligned.");
|
"buckets should be 4-byte aligned.");
|
||||||
|
|
||||||
unsigned NumBuckets = endian::readNext<uint32_t, little, aligned>(Buckets);
|
offset_type NumBuckets =
|
||||||
unsigned NumEntries = endian::readNext<uint32_t, little, aligned>(Buckets);
|
endian::readNext<offset_type, little, aligned>(Buckets);
|
||||||
|
offset_type NumEntries =
|
||||||
|
endian::readNext<offset_type, little, aligned>(Buckets);
|
||||||
return new OnDiskChainedHashTable<Info>(NumBuckets, NumEntries, Buckets,
|
return new OnDiskChainedHashTable<Info>(NumBuckets, NumEntries, Buckets,
|
||||||
Base, InfoObj);
|
Base, InfoObj);
|
||||||
}
|
}
|
||||||
@ -373,8 +380,9 @@ public:
|
|||||||
typedef typename base_type::external_key_type external_key_type;
|
typedef typename base_type::external_key_type external_key_type;
|
||||||
typedef typename base_type::data_type data_type;
|
typedef typename base_type::data_type data_type;
|
||||||
typedef typename base_type::hash_value_type hash_value_type;
|
typedef typename base_type::hash_value_type hash_value_type;
|
||||||
|
typedef typename base_type::offset_type offset_type;
|
||||||
|
|
||||||
OnDiskIterableChainedHashTable(unsigned NumBuckets, unsigned NumEntries,
|
OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
|
||||||
const unsigned char *Buckets,
|
const unsigned char *Buckets,
|
||||||
const unsigned char *Payload,
|
const unsigned char *Payload,
|
||||||
const unsigned char *Base,
|
const unsigned char *Base,
|
||||||
@ -385,14 +393,14 @@ public:
|
|||||||
/// \brief Iterates over all of the keys in the table.
|
/// \brief Iterates over all of the keys in the table.
|
||||||
class key_iterator {
|
class key_iterator {
|
||||||
const unsigned char *Ptr;
|
const unsigned char *Ptr;
|
||||||
unsigned NumItemsInBucketLeft;
|
offset_type NumItemsInBucketLeft;
|
||||||
unsigned NumEntriesLeft;
|
offset_type NumEntriesLeft;
|
||||||
Info *InfoObj;
|
Info *InfoObj;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef external_key_type value_type;
|
typedef external_key_type value_type;
|
||||||
|
|
||||||
key_iterator(const unsigned char *const Ptr, unsigned NumEntries,
|
key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
|
||||||
Info *InfoObj)
|
Info *InfoObj)
|
||||||
: Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
|
: Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
|
||||||
InfoObj(InfoObj) {}
|
InfoObj(InfoObj) {}
|
||||||
@ -416,7 +424,8 @@ public:
|
|||||||
}
|
}
|
||||||
Ptr += sizeof(hash_value_type); // Skip the hash.
|
Ptr += sizeof(hash_value_type); // Skip the hash.
|
||||||
// Determine the length of the key and the data.
|
// Determine the length of the key and the data.
|
||||||
const std::pair<unsigned, unsigned> &L = Info::ReadKeyDataLength(Ptr);
|
const std::pair<offset_type, offset_type> &L =
|
||||||
|
Info::ReadKeyDataLength(Ptr);
|
||||||
Ptr += L.first + L.second;
|
Ptr += L.first + L.second;
|
||||||
assert(NumItemsInBucketLeft);
|
assert(NumItemsInBucketLeft);
|
||||||
--NumItemsInBucketLeft;
|
--NumItemsInBucketLeft;
|
||||||
@ -435,7 +444,7 @@ public:
|
|||||||
LocalPtr += sizeof(hash_value_type); // Skip the hash.
|
LocalPtr += sizeof(hash_value_type); // Skip the hash.
|
||||||
|
|
||||||
// Determine the length of the key and the data.
|
// Determine the length of the key and the data.
|
||||||
const std::pair<unsigned, unsigned> &L =
|
const std::pair<offset_type, offset_type> &L =
|
||||||
Info::ReadKeyDataLength(LocalPtr);
|
Info::ReadKeyDataLength(LocalPtr);
|
||||||
|
|
||||||
// Read the key.
|
// Read the key.
|
||||||
@ -456,14 +465,14 @@ public:
|
|||||||
/// \brief Iterates over all the entries in the table, returning the data.
|
/// \brief Iterates over all the entries in the table, returning the data.
|
||||||
class data_iterator {
|
class data_iterator {
|
||||||
const unsigned char *Ptr;
|
const unsigned char *Ptr;
|
||||||
unsigned NumItemsInBucketLeft;
|
offset_type NumItemsInBucketLeft;
|
||||||
unsigned NumEntriesLeft;
|
offset_type NumEntriesLeft;
|
||||||
Info *InfoObj;
|
Info *InfoObj;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef data_type value_type;
|
typedef data_type value_type;
|
||||||
|
|
||||||
data_iterator(const unsigned char *const Ptr, unsigned NumEntries,
|
data_iterator(const unsigned char *const Ptr, offset_type NumEntries,
|
||||||
Info *InfoObj)
|
Info *InfoObj)
|
||||||
: Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
|
: Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
|
||||||
InfoObj(InfoObj) {}
|
InfoObj(InfoObj) {}
|
||||||
@ -487,7 +496,8 @@ public:
|
|||||||
}
|
}
|
||||||
Ptr += sizeof(hash_value_type); // Skip the hash.
|
Ptr += sizeof(hash_value_type); // Skip the hash.
|
||||||
// Determine the length of the key and the data.
|
// Determine the length of the key and the data.
|
||||||
const std::pair<unsigned, unsigned> &L = Info::ReadKeyDataLength(Ptr);
|
const std::pair<offset_type, offset_type> &L =
|
||||||
|
Info::ReadKeyDataLength(Ptr);
|
||||||
Ptr += L.first + L.second;
|
Ptr += L.first + L.second;
|
||||||
assert(NumItemsInBucketLeft);
|
assert(NumItemsInBucketLeft);
|
||||||
--NumItemsInBucketLeft;
|
--NumItemsInBucketLeft;
|
||||||
@ -506,7 +516,7 @@ public:
|
|||||||
LocalPtr += sizeof(hash_value_type); // Skip the hash.
|
LocalPtr += sizeof(hash_value_type); // Skip the hash.
|
||||||
|
|
||||||
// Determine the length of the key and the data.
|
// Determine the length of the key and the data.
|
||||||
const std::pair<unsigned, unsigned> &L =
|
const std::pair<offset_type, offset_type> &L =
|
||||||
Info::ReadKeyDataLength(LocalPtr);
|
Info::ReadKeyDataLength(LocalPtr);
|
||||||
|
|
||||||
// Read the key.
|
// Read the key.
|
||||||
@ -545,8 +555,10 @@ public:
|
|||||||
assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
|
assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
|
||||||
"buckets should be 4-byte aligned.");
|
"buckets should be 4-byte aligned.");
|
||||||
|
|
||||||
unsigned NumBuckets = endian::readNext<uint32_t, little, aligned>(Buckets);
|
offset_type NumBuckets =
|
||||||
unsigned NumEntries = endian::readNext<uint32_t, little, aligned>(Buckets);
|
endian::readNext<offset_type, little, aligned>(Buckets);
|
||||||
|
offset_type NumEntries =
|
||||||
|
endian::readNext<offset_type, little, aligned>(Buckets);
|
||||||
return new OnDiskIterableChainedHashTable<Info>(
|
return new OnDiskIterableChainedHashTable<Info>(
|
||||||
NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj);
|
NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user