[lld-macho] Switch to xxh3_64bits

xxh3 is substantially faster than xxh64.
For lld/ELF, there is substantial speedup in `.debug_str` duplicate
elimination (D154813). Use xxh3 for lld-macho as well.

Reviewed By: #lld-macho, oontvoo

Differential Revision: https://reviews.llvm.org/D155677
This commit is contained in:
Fangrui Song 2023-07-19 09:58:43 -07:00
parent 0ea11835d8
commit 2090d66b23
4 changed files with 7 additions and 7 deletions

View File

@ -457,7 +457,7 @@ void macho::foldIdenticalSections(bool onlyCfStrings) {
assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
// Turn-on the top bit to guarantee that valid hashes have no collisions
// with the small-integer unique IDs for ICF-ineligible sections
isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 31);
isec->icfEqClass[0] = xxh3_64bits(isec->data) | (1ull << 31);
});
// Now that every input section is either hashed or marked as unique, run the
// segregation algorithm to detect foldable subsections.

View File

@ -246,7 +246,7 @@ void CStringInputSection::splitIntoPieces() {
size_t end = s.find(0);
if (end == StringRef::npos)
fatal(getLocation(off) + ": string is not null terminated");
uint32_t hash = deduplicateLiterals ? xxHash64(s.take_front(end)) : 0;
uint32_t hash = deduplicateLiterals ? xxh3_64bits(s.take_front(end)) : 0;
pieces.emplace_back(off, hash);
size_t size = end + 1; // include null terminator
s = s.substr(size);

View File

@ -1677,7 +1677,7 @@ void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
DeduplicatedCStringSection::StringOffset
DeduplicatedCStringSection::getStringOffset(StringRef str) const {
// StringPiece uses 31 bits to store the hashes, so we replicate that
uint32_t hash = xxHash64(str) & 0x7fffffff;
uint32_t hash = xxh3_64bits(str) & 0x7fffffff;
auto offset = stringOffsetMap.find(CachedHashStringRef(str, hash));
assert(offset != stringOffsetMap.end() &&
"Looked-up strings should always exist in section");

View File

@ -1190,14 +1190,14 @@ void Writer::writeUuid() {
threadFutures.reserve(chunks.size());
for (size_t i = 0; i < chunks.size(); ++i)
threadFutures.emplace_back(threadPool.async(
[&](size_t j) { hashes[j] = xxHash64(chunks[j]); }, i));
[&](size_t j) { hashes[j] = xxh3_64bits(chunks[j]); }, i));
for (std::shared_future<void> &future : threadFutures)
future.wait();
// Append the output filename so that identical binaries with different names
// don't get the same UUID.
hashes[chunks.size()] = xxHash64(sys::path::filename(config->finalOutput));
uint64_t digest = xxHash64({reinterpret_cast<uint8_t *>(hashes.data()),
hashes.size() * sizeof(uint64_t)});
hashes[chunks.size()] = xxh3_64bits(sys::path::filename(config->finalOutput));
uint64_t digest = xxh3_64bits({reinterpret_cast<uint8_t *>(hashes.data()),
hashes.size() * sizeof(uint64_t)});
uuidCommand->writeUuid(digest);
}