mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-29 16:41:27 +00:00
[RewriteStatepointsForGC] Reduce the number of new instructions for base pointers
When computing base pointers, we introduce new instructions to propagate the base of existing instructions which might not be bases. However, the algorithm doesn't make any effort to recognize when the new instruction to be inserted is the same as an existing one already in the IR. Since this is happening immediately before rewriting, we don't really have a chance to fix it after the pass runs without teaching loop passes about statepoints. I'm really not thrilled with this patch. I've rewritten it 4 different ways now, but this is the best I've come up with. The case where the new instruction is just the original base defining value could be merged into the existing algorithm with some complexity. The problem is that we might have something like an extractelement from a phi of two vectors. It may be trivially obvious that the base of the 0th element is an existing instruction, but I can't see how to make the algorithm itself figure that out. Thus, I resort to the call to SimplifyInstruction instead. Note that we can only adjust the instructions we've inserted ourselves. The live sets are still being tracked in side structures at this point in the code. We can't easily muck with instructions which might be in them. Long term, I'm really thinking we need to materialize the live pointer sets explicitly in the IR somehow rather than using side structures to track them. Differential Revision: http://reviews.llvm.org/D12004 llvm-svn: 246133
This commit is contained in:
parent
e0f400feaa
commit
abcdc5e3a8
@ -14,6 +14,7 @@
|
||||
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Analysis/CFG.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
#include "llvm/ADT/SetOperations.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
@ -1009,6 +1010,62 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
|
||||
}
|
||||
}
|
||||
|
||||
// Now that we're done with the algorithm, see if we can optimize the
|
||||
// results slightly by reducing the number of new instructions needed.
|
||||
// Arguably, this should be integrated into the algorithm above, but
|
||||
// doing as a post process step is easier to reason about for the moment.
|
||||
DenseMap<Value *, Value *> ReverseMap;
|
||||
SmallPtrSet<Instruction *, 16> NewInsts;
|
||||
SmallSetVector<Instruction *, 16> Worklist;
|
||||
for (auto Item : states) {
|
||||
Value *V = Item.first;
|
||||
Value *Base = Item.second.getBase();
|
||||
assert(V && Base);
|
||||
assert(!isKnownBaseResult(V) && "why did it get added?");
|
||||
assert(isKnownBaseResult(Base) &&
|
||||
"must be something we 'know' is a base pointer");
|
||||
if (!Item.second.isConflict())
|
||||
continue;
|
||||
|
||||
ReverseMap[Base] = V;
|
||||
if (auto *BaseI = dyn_cast<Instruction>(Base)) {
|
||||
NewInsts.insert(BaseI);
|
||||
Worklist.insert(BaseI);
|
||||
}
|
||||
}
|
||||
auto PushNewUsers = [&](Instruction *I) {
|
||||
for (User *U : I->users())
|
||||
if (auto *UI = dyn_cast<Instruction>(U))
|
||||
if (NewInsts.count(UI))
|
||||
Worklist.insert(UI);
|
||||
};
|
||||
const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout();
|
||||
while (!Worklist.empty()) {
|
||||
Instruction *BaseI = Worklist.pop_back_val();
|
||||
Value *Bdv = ReverseMap[BaseI];
|
||||
if (auto *BdvI = dyn_cast<Instruction>(Bdv))
|
||||
if (BaseI->isIdenticalTo(BdvI)) {
|
||||
DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n");
|
||||
PushNewUsers(BaseI);
|
||||
BaseI->replaceAllUsesWith(Bdv);
|
||||
BaseI->eraseFromParent();
|
||||
states[Bdv] = BDVState(BDVState::Conflict, Bdv);
|
||||
NewInsts.erase(BaseI);
|
||||
ReverseMap.erase(BaseI);
|
||||
continue;
|
||||
}
|
||||
if (Value *V = SimplifyInstruction(BaseI, DL)) {
|
||||
DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n");
|
||||
PushNewUsers(BaseI);
|
||||
BaseI->replaceAllUsesWith(V);
|
||||
BaseI->eraseFromParent();
|
||||
states[Bdv] = BDVState(BDVState::Conflict, V);
|
||||
NewInsts.erase(BaseI);
|
||||
ReverseMap.erase(BaseI);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Cache all of our results so we can cheaply reuse them
|
||||
// NOTE: This is actually two caches: one of the base defining value
|
||||
// relation and one of the base pointer relation! FIXME
|
||||
@ -1016,7 +1073,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
|
||||
Value *v = item.first;
|
||||
Value *base = item.second.getBase();
|
||||
assert(v && base);
|
||||
assert(!isKnownBaseResult(v) && "why did it get added?");
|
||||
|
||||
if (TraceLSP) {
|
||||
std::string fromstr =
|
||||
@ -1028,8 +1084,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
|
||||
<< " to: " << (base->hasName() ? base->getName() : "") << "\n";
|
||||
}
|
||||
|
||||
assert(isKnownBaseResult(base) &&
|
||||
"must be something we 'know' is a base pointer");
|
||||
if (cache.count(v)) {
|
||||
// Once we transition from the BDV relation being store in the cache to
|
||||
// the base relation being stored, it must be stable
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
|
||||
|
||||
|
||||
; CHECK: derived %obj_to_consume base %obj_to_consume.base
|
||||
; CHECK: derived %obj_to_consume base %obj_to_consume
|
||||
|
||||
declare void @foo()
|
||||
declare i64 addrspace(1)* @generate_obj()
|
||||
@ -33,7 +33,6 @@ dest_c:
|
||||
|
||||
merge:
|
||||
; CHECK: merge:
|
||||
; CHECK: %obj_to_consume.base = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
|
||||
; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
|
||||
|
||||
%obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
|
||||
|
@ -80,7 +80,6 @@ loop: ; preds = %loop, %entry
|
||||
; we'd have commoned these, but that's a missed optimization, not correctness.
|
||||
; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
|
||||
; CHECK-NOT: extra.base
|
||||
; CHECK: next.base = select
|
||||
; CHECK: next = select
|
||||
; CHECK: extra2.base = select
|
||||
; CHECK: extra2 = select
|
||||
@ -95,6 +94,24 @@ loop: ; preds = %loop, %entry
|
||||
br label %loop
|
||||
}
|
||||
|
||||
define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj,
|
||||
i64 addrspace(1)* %obj2)
|
||||
gc "statepoint-example" {
|
||||
; CHECK-LABEL: @test3
|
||||
entry:
|
||||
br i1 %cnd, label %merge, label %taken
|
||||
taken:
|
||||
br label %merge
|
||||
merge:
|
||||
; CHECK-LABEL: merge:
|
||||
; CHECK-NEXT: %bdv = phi
|
||||
; CHECK-NEXT: gc.statepoint
|
||||
%bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
|
||||
%safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
|
||||
ret i64 addrspace(1)* %bdv
|
||||
}
|
||||
|
||||
|
||||
declare void @foo()
|
||||
declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
|
||||
declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
|
||||
|
@ -40,13 +40,10 @@ untaken2:
|
||||
br label %merge2
|
||||
merge2:
|
||||
; CHECK-LABEL: merge2:
|
||||
; CHECK: %obj.base = phi i64 addrspace(1)*
|
||||
; CHECK: %obj = phi i64 addrspace(1)*
|
||||
; CHECK: statepoint
|
||||
; CHECK-NEXT: %obj = phi i64 addrspace(1)*
|
||||
; CHECK-NEXT: statepoint
|
||||
; CHECK: gc.relocate
|
||||
; CHECK-DAG: ; (%obj.base, %obj)
|
||||
; CHECK: gc.relocate
|
||||
; CHECK-DAG: ; (%obj.base, %obj.base)
|
||||
; CHECK-DAG: ; (%obj, %obj)
|
||||
%obj = phi i64 addrspace(1)* [%obj0, %taken2], [%obj1, %untaken2]
|
||||
%safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
|
||||
ret i64 addrspace(1)* %obj
|
||||
|
@ -121,9 +121,6 @@ define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
|
||||
; CHECK-LABEL: test6
|
||||
; CHECK-LABEL: merge:
|
||||
; CHECK-NEXT: = phi
|
||||
; CHECK-NEXT: = phi
|
||||
; CHECK-NEXT: extractelement
|
||||
; CHECK-NEXT: extractelement
|
||||
; CHECK-NEXT: extractelement
|
||||
; CHECK-NEXT: extractelement
|
||||
; CHECK-NEXT: gc.statepoint
|
||||
@ -131,12 +128,6 @@ define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
|
||||
; CHECK-NEXT: bitcast
|
||||
; CHECK-NEXT: gc.relocate
|
||||
; CHECK-NEXT: bitcast
|
||||
; CHECK-NEXT: gc.relocate
|
||||
; CHECK-NEXT: bitcast
|
||||
; CHECK-NEXT: gc.relocate
|
||||
; CHECK-NEXT: bitcast
|
||||
; CHECK-NEXT: insertelement
|
||||
; CHECK-NEXT: insertelement
|
||||
; CHECK-NEXT: insertelement
|
||||
; CHECK-NEXT: insertelement
|
||||
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
|
||||
|
Loading…
Reference in New Issue
Block a user