mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-26 05:18:46 +00:00
Revert "[NFCi][MergeFunctions] Consolidate Hashing Functions"
This reverts commit 28134a29fdedd8972acdfb39223571ddcc15dc59. This patch was causing build failures on multiple buildbots on 32-bit architectures. Reverting now so I can deboug out-of-trunk and resubmit later.
This commit is contained in:
parent
b41e75c8a4
commit
7ff7df1c62
@ -21,10 +21,8 @@ namespace llvm {
|
||||
class Function;
|
||||
class Module;
|
||||
|
||||
using IRHash = uint64_t;
|
||||
|
||||
IRHash StructuralHash(const Function &F);
|
||||
IRHash StructuralHash(const Module &M);
|
||||
uint64_t StructuralHash(const Function &F);
|
||||
uint64_t StructuralHash(const Module &M);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
|
@ -99,6 +99,11 @@ public:
|
||||
/// Test whether the two functions have equivalent behaviour.
|
||||
int compare();
|
||||
|
||||
/// Hash a function. Equivalent functions will have the same hash, and unequal
|
||||
/// functions will have different hashes with high probability.
|
||||
using FunctionHash = uint64_t;
|
||||
static FunctionHash functionHash(Function &);
|
||||
|
||||
protected:
|
||||
/// Start the comparison.
|
||||
void beginCompare() {
|
||||
|
@ -27,28 +27,12 @@ class StructuralHashImpl {
|
||||
public:
|
||||
StructuralHashImpl() : Hash(4) {}
|
||||
|
||||
// A function hash is calculated by considering only the number of arguments
|
||||
// and whether a function is varargs, the order of basic blocks (given by the
|
||||
// successors of each basic block in depth first order), and the order of
|
||||
// opcodes of each instruction within each of these basic blocks. This mirrors
|
||||
// the strategy FunctionComparator::compare() uses to compare functions by
|
||||
// walking the BBs in depth first order and comparing each instruction in
|
||||
// sequence. Because this hash currently does not look at the operands, it is
|
||||
// insensitive to things such as the target of calls and the constants used in
|
||||
// the function, which makes it useful when possibly merging functions which
|
||||
// are the same modulo constants and call targets.
|
||||
//
|
||||
// Note that different users of StructuralHash will want different behavior
|
||||
// out of it (i.e., MergeFunctions will want something different from PM
|
||||
// expensive checks for pass modification status). When modifying this
|
||||
// function, most changes should be gated behind an option and enabled
|
||||
// selectively.
|
||||
void update(const Function &F) {
|
||||
// Declarations don't affect analyses.
|
||||
if (F.isDeclaration())
|
||||
return;
|
||||
|
||||
hash(0x6acaa36bef8325c5ULL); // Function header
|
||||
hash(12345); // Function header
|
||||
|
||||
hash(F.isVarArg());
|
||||
hash(F.arg_size());
|
||||
@ -56,18 +40,11 @@ public:
|
||||
SmallVector<const BasicBlock *, 8> BBs;
|
||||
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
|
||||
|
||||
// Walk the blocks in the same order as
|
||||
// FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
|
||||
// function "structure." (BB and opcode sequence)
|
||||
BBs.push_back(&F.getEntryBlock());
|
||||
VisitedBBs.insert(BBs[0]);
|
||||
while (!BBs.empty()) {
|
||||
const BasicBlock *BB = BBs.pop_back_val();
|
||||
|
||||
// This random value acts as a block header, as otherwise the partition of
|
||||
// opcodes into BBs wouldn't affect the hash, only the order of the
|
||||
// opcodes
|
||||
hash(45798);
|
||||
hash(45798); // Block header
|
||||
for (auto &Inst : *BB)
|
||||
hash(Inst.getOpcode());
|
||||
|
||||
@ -102,13 +79,13 @@ public:
|
||||
|
||||
} // namespace
|
||||
|
||||
IRHash llvm::StructuralHash(const Function &F) {
|
||||
uint64_t llvm::StructuralHash(const Function &F) {
|
||||
StructuralHashImpl H;
|
||||
H.update(F);
|
||||
return H.getHash();
|
||||
}
|
||||
|
||||
IRHash llvm::StructuralHash(const Module &M) {
|
||||
uint64_t llvm::StructuralHash(const Module &M) {
|
||||
StructuralHashImpl H;
|
||||
H.update(M);
|
||||
return H.getHash();
|
||||
|
@ -107,7 +107,6 @@
|
||||
#include "llvm/IR/Instructions.h"
|
||||
#include "llvm/IR/IntrinsicInst.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/StructuralHash.h"
|
||||
#include "llvm/IR/Type.h"
|
||||
#include "llvm/IR/Use.h"
|
||||
#include "llvm/IR/User.h"
|
||||
@ -172,14 +171,15 @@ namespace {
|
||||
|
||||
class FunctionNode {
|
||||
mutable AssertingVH<Function> F;
|
||||
IRHash Hash;
|
||||
FunctionComparator::FunctionHash Hash;
|
||||
|
||||
public:
|
||||
// Note the hash is recalculated potentially multiple times, but it is cheap.
|
||||
FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
|
||||
FunctionNode(Function *F)
|
||||
: F(F), Hash(FunctionComparator::functionHash(*F)) {}
|
||||
|
||||
Function *getFunc() const { return F; }
|
||||
IRHash getHash() const { return Hash; }
|
||||
FunctionComparator::FunctionHash getHash() const { return Hash; }
|
||||
|
||||
/// Replace the reference to the function F by the function G, assuming their
|
||||
/// implementations are equal.
|
||||
@ -390,10 +390,11 @@ bool MergeFunctions::runOnModule(Module &M) {
|
||||
|
||||
// All functions in the module, ordered by hash. Functions with a unique
|
||||
// hash value are easily eliminated.
|
||||
std::vector<std::pair<IRHash, Function *>> HashedFuncs;
|
||||
std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
|
||||
HashedFuncs;
|
||||
for (Function &Func : M) {
|
||||
if (isEligibleForMerging(Func)) {
|
||||
HashedFuncs.push_back({StructuralHash(Func), &Func});
|
||||
HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -958,3 +958,67 @@ int FunctionComparator::compare() {
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
|
||||
// hash of a sequence of 64bit ints, but the entire input does not need to be
|
||||
// available at once. This interface is necessary for functionHash because it
|
||||
// needs to accumulate the hash as the structure of the function is traversed
|
||||
// without saving these values to an intermediate buffer. This form of hashing
|
||||
// is not often needed, as usually the object to hash is just read from a
|
||||
// buffer.
|
||||
class HashAccumulator64 {
|
||||
uint64_t Hash;
|
||||
|
||||
public:
|
||||
// Initialize to random constant, so the state isn't zero.
|
||||
HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
|
||||
|
||||
void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
|
||||
|
||||
// No finishing is required, because the entire hash value is used.
|
||||
uint64_t getHash() { return Hash; }
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
||||
// A function hash is calculated by considering only the number of arguments and
|
||||
// whether a function is varargs, the order of basic blocks (given by the
|
||||
// successors of each basic block in depth first order), and the order of
|
||||
// opcodes of each instruction within each of these basic blocks. This mirrors
|
||||
// the strategy compare() uses to compare functions by walking the BBs in depth
|
||||
// first order and comparing each instruction in sequence. Because this hash
|
||||
// does not look at the operands, it is insensitive to things such as the
|
||||
// target of calls and the constants used in the function, which makes it useful
|
||||
// when possibly merging functions which are the same modulo constants and call
|
||||
// targets.
|
||||
FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
|
||||
HashAccumulator64 H;
|
||||
H.add(F.isVarArg());
|
||||
H.add(F.arg_size());
|
||||
|
||||
SmallVector<const BasicBlock *, 8> BBs;
|
||||
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
|
||||
|
||||
// Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
|
||||
// accumulating the hash of the function "structure." (BB and opcode sequence)
|
||||
BBs.push_back(&F.getEntryBlock());
|
||||
VisitedBBs.insert(BBs[0]);
|
||||
while (!BBs.empty()) {
|
||||
const BasicBlock *BB = BBs.pop_back_val();
|
||||
// This random value acts as a block header, as otherwise the partition of
|
||||
// opcodes into BBs wouldn't affect the hash, only the order of the opcodes
|
||||
H.add(45798);
|
||||
for (const auto &Inst : *BB) {
|
||||
H.add(Inst.getOpcode());
|
||||
}
|
||||
const Instruction *Term = BB->getTerminator();
|
||||
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
|
||||
if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
|
||||
continue;
|
||||
BBs.push_back(Term->getSuccessor(i));
|
||||
}
|
||||
}
|
||||
return H.getHash();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user