Revert "[NFCi][MergeFunctions] Consolidate Hashing Functions"

This reverts commit 28134a29fdedd8972acdfb39223571ddcc15dc59. This patch was causing build failures on multiple buildbots on 32-bit architectures. Reverting now so I can deboug out-of-trunk and resubmit later.
2025-02-26 05:18:46 +00:00 · 2023-08-19 12:23:16 -07:00 · 2023-08-19 12:23:16 -07:00 · 7ff7df1c62
commit 7ff7df1c62
parent b41e75c8a4
5 changed files with 82 additions and 37 deletions
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@ -21,10 +21,8 @@ namespace llvm {
 class Function;
 class Module;

-using IRHash = uint64_t;
-
-IRHash StructuralHash(const Function &F);
-IRHash StructuralHash(const Module &M);
+uint64_t StructuralHash(const Function &F);
+uint64_t StructuralHash(const Module &M);

 } // end namespace llvm

--- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
@ -99,6 +99,11 @@ public:
  /// Test whether the two functions have equivalent behaviour.
  int compare();

+  /// Hash a function. Equivalent functions will have the same hash, and unequal
+  /// functions will have different hashes with high probability.
+  using FunctionHash = uint64_t;
+  static FunctionHash functionHash(Function &);
+
 protected:
  /// Start the comparison.
  void beginCompare() {
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@ -27,28 +27,12 @@ class StructuralHashImpl {
 public:
  StructuralHashImpl() : Hash(4) {}

-  // A function hash is calculated by considering only the number of arguments
-  // and whether a function is varargs, the order of basic blocks (given by the
-  // successors of each basic block in depth first order), and the order of
-  // opcodes of each instruction within each of these basic blocks. This mirrors
-  // the strategy FunctionComparator::compare() uses to compare functions by
-  // walking the BBs in depth first order and comparing each instruction in
-  // sequence. Because this hash currently does not look at the operands, it is
-  // insensitive to things such as the target of calls and the constants used in
-  // the function, which makes it useful when possibly merging functions which
-  // are the same modulo constants and call targets.
-  //
-  // Note that different users of StructuralHash will want different behavior
-  // out of it (i.e., MergeFunctions will want something different from PM
-  // expensive checks for pass modification status). When modifying this
-  // function, most changes should be gated behind an option and enabled
-  // selectively.
  void update(const Function &F) {
    // Declarations don't affect analyses.
    if (F.isDeclaration())
      return;

-    hash(0x6acaa36bef8325c5ULL); // Function header
+    hash(12345); // Function header

    hash(F.isVarArg());
    hash(F.arg_size());
@ -56,18 +40,11 @@ public:
    SmallVector<const BasicBlock *, 8> BBs;
    SmallPtrSet<const BasicBlock *, 16> VisitedBBs;

-    // Walk the blocks in the same order as
-    // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
-    // function "structure." (BB and opcode sequence)
    BBs.push_back(&F.getEntryBlock());
    VisitedBBs.insert(BBs[0]);
    while (!BBs.empty()) {
      const BasicBlock *BB = BBs.pop_back_val();
-
-      // This random value acts as a block header, as otherwise the partition of
-      // opcodes into BBs wouldn't affect the hash, only the order of the
-      // opcodes
-      hash(45798);
+      hash(45798); // Block header
      for (auto &Inst : *BB)
        hash(Inst.getOpcode());

@ -102,13 +79,13 @@ public:

 } // namespace

-IRHash llvm::StructuralHash(const Function &F) {
+uint64_t llvm::StructuralHash(const Function &F) {
  StructuralHashImpl H;
  H.update(F);
  return H.getHash();
 }

-IRHash llvm::StructuralHash(const Module &M) {
+uint64_t llvm::StructuralHash(const Module &M) {
  StructuralHashImpl H;
  H.update(M);
  return H.getHash();
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@ -107,7 +107,6 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/StructuralHash.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
@ -172,14 +171,15 @@ namespace {

 class FunctionNode {
  mutable AssertingVH<Function> F;
-  IRHash Hash;
+  FunctionComparator::FunctionHash Hash;

 public:
  // Note the hash is recalculated potentially multiple times, but it is cheap.
-  FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
+  FunctionNode(Function *F)
+    : F(F), Hash(FunctionComparator::functionHash(*F))  {}

  Function *getFunc() const { return F; }
-  IRHash getHash() const { return Hash; }
+  FunctionComparator::FunctionHash getHash() const { return Hash; }

  /// Replace the reference to the function F by the function G, assuming their
  /// implementations are equal.
@ -390,10 +390,11 @@ bool MergeFunctions::runOnModule(Module &M) {

  // All functions in the module, ordered by hash. Functions with a unique
  // hash value are easily eliminated.
-  std::vector<std::pair<IRHash, Function *>> HashedFuncs;
+  std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
+    HashedFuncs;
  for (Function &Func : M) {
    if (isEligibleForMerging(Func)) {
-      HashedFuncs.push_back({StructuralHash(Func), &Func});
+      HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
    }
  }

--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@ -958,3 +958,67 @@ int FunctionComparator::compare() {
  }
  return 0;
 }
+
+namespace {
+
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+  uint64_t Hash;
+
+public:
+  // Initialize to random constant, so the state isn't zero.
+  HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+
+  void add(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+
+  // No finishing is required, because the entire hash value is used.
+  uint64_t getHash() { return Hash; }
+};
+
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+  HashAccumulator64 H;
+  H.add(F.isVarArg());
+  H.add(F.arg_size());
+
+  SmallVector<const BasicBlock *, 8> BBs;
+  SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
+
+  // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+  // accumulating the hash of the function "structure." (BB and opcode sequence)
+  BBs.push_back(&F.getEntryBlock());
+  VisitedBBs.insert(BBs[0]);
+  while (!BBs.empty()) {
+    const BasicBlock *BB = BBs.pop_back_val();
+    // This random value acts as a block header, as otherwise the partition of
+    // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+    H.add(45798);
+    for (const auto &Inst : *BB) {
+      H.add(Inst.getOpcode());
+    }
+    const Instruction *Term = BB->getTerminator();
+    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+        continue;
+      BBs.push_back(Term->getSuccessor(i));
+    }
+  }
+  return H.getHash();
+}