[NFCi][MergeFunctions] Consolidate Hashing Functions

A couple years ago, StructuralHash was created, copying the exact
hashing implementation from FunctionComparator (minus a couple small
details/refactorings). Since then, the hashing implementation has not
diverged, but several other areas, like unit testing, have diverged
significantly, with StructuralHash getting more attention in these
areas. This patch aims to consolidate the two hashing functions into
StructuralHash given they do the exact same thing and having less
divergence in areas like unit testing would be beneficial.

The original aim at creating a separate StructuralHash was to make the
implementation divergent and capture additional details like instruction
operands (which neither hashing implementation does currently). The
MergeFunctions pass doesn't need these detaisl, but verification of pass
return values would benefit from this additional data. Setting an option
to calculate these values would allow for divergent behavior where
appropriate while reducing code duplication with little runtime
overhead.

Reviewed By: aeubanks

Differential Revision: https://reviews.llvm.org/D158217
This commit is contained in:
Aiden Grossman 2023-08-17 13:57:51 -07:00
parent 4da76ea70a
commit 28134a29fd
5 changed files with 37 additions and 82 deletions

View File

@ -21,8 +21,10 @@ namespace llvm {
class Function;
class Module;
uint64_t StructuralHash(const Function &F);
uint64_t StructuralHash(const Module &M);
using IRHash = uint64_t;
IRHash StructuralHash(const Function &F);
IRHash StructuralHash(const Module &M);
} // end namespace llvm

View File

@ -99,11 +99,6 @@ public:
/// Test whether the two functions have equivalent behaviour.
int compare();
/// Hash a function. Equivalent functions will have the same hash, and unequal
/// functions will have different hashes with high probability.
using FunctionHash = uint64_t;
static FunctionHash functionHash(Function &);
protected:
/// Start the comparison.
void beginCompare() {

View File

@ -27,12 +27,28 @@ class StructuralHashImpl {
public:
StructuralHashImpl() : Hash(4) {}
// A function hash is calculated by considering only the number of arguments
// and whether a function is varargs, the order of basic blocks (given by the
// successors of each basic block in depth first order), and the order of
// opcodes of each instruction within each of these basic blocks. This mirrors
// the strategy FunctionComparator::compare() uses to compare functions by
// walking the BBs in depth first order and comparing each instruction in
// sequence. Because this hash currently does not look at the operands, it is
// insensitive to things such as the target of calls and the constants used in
// the function, which makes it useful when possibly merging functions which
// are the same modulo constants and call targets.
//
// Note that different users of StructuralHash will want different behavior
// out of it (i.e., MergeFunctions will want something different from PM
// expensive checks for pass modification status). When modifying this
// function, most changes should be gated behind an option and enabled
// selectively.
void update(const Function &F) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;
hash(12345); // Function header
hash(0x6acaa36bef8325c5ULL); // Function header
hash(F.isVarArg());
hash(F.arg_size());
@ -40,11 +56,18 @@ public:
SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
// Walk the blocks in the same order as
// FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
// function "structure." (BB and opcode sequence)
BBs.push_back(&F.getEntryBlock());
VisitedBBs.insert(BBs[0]);
while (!BBs.empty()) {
const BasicBlock *BB = BBs.pop_back_val();
hash(45798); // Block header
// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the
// opcodes
hash(45798);
for (auto &Inst : *BB)
hash(Inst.getOpcode());
@ -79,13 +102,13 @@ public:
} // namespace
uint64_t llvm::StructuralHash(const Function &F) {
IRHash llvm::StructuralHash(const Function &F) {
StructuralHashImpl H;
H.update(F);
return H.getHash();
}
uint64_t llvm::StructuralHash(const Module &M) {
IRHash llvm::StructuralHash(const Module &M) {
StructuralHashImpl H;
H.update(M);
return H.getHash();

View File

@ -107,6 +107,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/StructuralHash.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
@ -171,15 +172,14 @@ namespace {
class FunctionNode {
mutable AssertingVH<Function> F;
FunctionComparator::FunctionHash Hash;
IRHash Hash;
public:
// Note the hash is recalculated potentially multiple times, but it is cheap.
FunctionNode(Function *F)
: F(F), Hash(FunctionComparator::functionHash(*F)) {}
FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
Function *getFunc() const { return F; }
FunctionComparator::FunctionHash getHash() const { return Hash; }
IRHash getHash() const { return Hash; }
/// Replace the reference to the function F by the function G, assuming their
/// implementations are equal.
@ -390,11 +390,10 @@ bool MergeFunctions::runOnModule(Module &M) {
// All functions in the module, ordered by hash. Functions with a unique
// hash value are easily eliminated.
std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
HashedFuncs;
std::vector<std::pair<IRHash, Function *>> HashedFuncs;
for (Function &Func : M) {
if (isEligibleForMerging(Func)) {
HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
HashedFuncs.push_back({StructuralHash(Func), &Func});
}
}

View File

@ -958,67 +958,3 @@ int FunctionComparator::compare() {
}
return 0;
}
namespace {
// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
// hash of a sequence of 64bit ints, but the entire input does not need to be
// available at once. This interface is necessary for functionHash because it
// needs to accumulate the hash as the structure of the function is traversed
// without saving these values to an intermediate buffer. This form of hashing
// is not often needed, as usually the object to hash is just read from a
// buffer.
class HashAccumulator64 {
uint64_t Hash;
public:
// Initialize to random constant, so the state isn't zero.
HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
// No finishing is required, because the entire hash value is used.
uint64_t getHash() { return Hash; }
};
} // end anonymous namespace
// A function hash is calculated by considering only the number of arguments and
// whether a function is varargs, the order of basic blocks (given by the
// successors of each basic block in depth first order), and the order of
// opcodes of each instruction within each of these basic blocks. This mirrors
// the strategy compare() uses to compare functions by walking the BBs in depth
// first order and comparing each instruction in sequence. Because this hash
// does not look at the operands, it is insensitive to things such as the
// target of calls and the constants used in the function, which makes it useful
// when possibly merging functions which are the same modulo constants and call
// targets.
FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
HashAccumulator64 H;
H.add(F.isVarArg());
H.add(F.arg_size());
SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
// Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
// accumulating the hash of the function "structure." (BB and opcode sequence)
BBs.push_back(&F.getEntryBlock());
VisitedBBs.insert(BBs[0]);
while (!BBs.empty()) {
const BasicBlock *BB = BBs.pop_back_val();
// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the opcodes
H.add(45798);
for (const auto &Inst : *BB) {
H.add(Inst.getOpcode());
}
const Instruction *Term = BB->getTerminator();
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
continue;
BBs.push_back(Term->getSuccessor(i));
}
}
return H.getHash();
}