mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-02-12 21:28:48 +00:00
[SROA] Propagate !range metadata when moving loads.
This tries to propagate !range metadata to a pre-existing load when a load is optimized out. This is done instead of adding an assume because converting loads to and from assumes creates a lot of IR. Patch by Ariel Ben-Yehuda. Differential Revision: https://reviews.llvm.org/D37216 llvm-svn: 319096
This commit is contained in:
parent
2072552360
commit
b5d59e73ee
@ -2455,15 +2455,10 @@ private:
|
||||
// are different types, for example by mapping !nonnull metadata to
|
||||
// !range metadata by modeling the null pointer constant converted to the
|
||||
// integer type.
|
||||
// FIXME: Add support for range metadata here. Currently the utilities
|
||||
// for this don't propagate range metadata in trivial cases from one
|
||||
// integer load to another, don't handle non-addrspace-0 null pointers
|
||||
// correctly, and don't have any support for mapping ranges as the
|
||||
// integer type becomes winder or narrower.
|
||||
if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
|
||||
copyNonnullMetadata(LI, N, *NewLI);
|
||||
|
||||
// Try to preserve nonnull metadata
|
||||
if (MDNode *N = LI.getMetadata(LLVMContext::MD_range))
|
||||
copyRangeMetadata(DL, LI, N, *NewLI);
|
||||
V = NewLI;
|
||||
|
||||
// If this is an integer load past the end of the slice (which means the
|
||||
@ -3654,7 +3649,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
|
||||
PartPtrTy, BasePtr->getName() + "."),
|
||||
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
|
||||
LI->getName());
|
||||
PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
|
||||
PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
|
||||
|
||||
// Append this load onto the list of split loads so we can find it later
|
||||
// to rewrite the stores.
|
||||
|
@ -1947,18 +1947,24 @@ void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
|
||||
void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
|
||||
MDNode *N, LoadInst &NewLI) {
|
||||
auto *NewTy = NewLI.getType();
|
||||
auto *OldTy = OldLI.getType();
|
||||
|
||||
// Give up unless it is converted to a pointer where there is a single very
|
||||
// valuable mapping we can do reliably.
|
||||
// FIXME: It would be nice to propagate this in more ways, but the type
|
||||
// conversions make it hard.
|
||||
if (!NewTy->isPointerTy())
|
||||
if (DL.getTypeStoreSizeInBits(NewTy) == DL.getTypeSizeInBits(OldTy) &&
|
||||
NewTy->isIntegerTy()) {
|
||||
// An integer with the same number of bits - give it the range
|
||||
// metadata!.
|
||||
NewLI.setMetadata(LLVMContext::MD_range, N);
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
|
||||
if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
|
||||
MDNode *NN = MDNode::get(OldLI.getContext(), None);
|
||||
NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
|
||||
if (NewTy->isPointerTy()) {
|
||||
// Try to convert the !range metadata to !nonnull metadata on the
|
||||
// new pointer.
|
||||
unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
|
||||
if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
|
||||
MDNode *NN = MDNode::get(OldLI.getContext(), None);
|
||||
NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/Optional.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
@ -48,7 +49,7 @@
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@ -177,6 +178,16 @@ public:
|
||||
ValVector Values;
|
||||
};
|
||||
|
||||
/// \brief Semi-open interval of instructions that are guaranteed to
|
||||
/// all execute if the first one does.
|
||||
class GuaranteedExecutionRange {
|
||||
public:
|
||||
unsigned Start;
|
||||
unsigned End;
|
||||
|
||||
GuaranteedExecutionRange(unsigned S, unsigned E): Start(S), End(E) {}
|
||||
};
|
||||
|
||||
/// \brief This assigns and keeps a per-bb relative ordering of load/store
|
||||
/// instructions in the block that directly load or store an alloca.
|
||||
///
|
||||
@ -190,14 +201,109 @@ class LargeBlockInfo {
|
||||
/// the block.
|
||||
DenseMap<const Instruction *, unsigned> InstNumbers;
|
||||
|
||||
/// \brief For each basic block we track, keep track of the intervals
|
||||
/// of instruction numbers of instructions that transfer control
|
||||
/// to their successors, for propagating metadata.
|
||||
DenseMap<const BasicBlock *, Optional<SmallVector<GuaranteedExecutionRange, 4>>>
|
||||
GuaranteedExecutionIntervals;
|
||||
|
||||
public:
|
||||
|
||||
/// This code only looks at accesses to allocas.
|
||||
/// This code looks for stores to allocas, and for loads both for
|
||||
/// allocas and for transferring metadata.
|
||||
static bool isInterestingInstruction(const Instruction *I) {
|
||||
return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
|
||||
return isa<LoadInst>(I) ||
|
||||
(isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
|
||||
}
|
||||
|
||||
/// Compute the GuaranteedExecutionIntervals for a given BB.
|
||||
///
|
||||
/// This is valid and remains valid as long as each interesting
|
||||
/// instruction (see isInterestingInstruction) that
|
||||
/// A) existed when this LBI was cleared
|
||||
/// B) has not been deleted (deleting interesting instructions is fine)
|
||||
/// are run in the same program executions and in the same order
|
||||
/// as when this LBI was cleared.
|
||||
///
|
||||
/// Because `PromoteMemoryToRegister` does not move memory loads at
|
||||
/// all, this assumption is satisfied in this pass.
|
||||
SmallVector<GuaranteedExecutionRange, 4> computeGEI(const BasicBlock *BB) {
|
||||
SmallVector<GuaranteedExecutionRange, 4> GuaranteedExecutionIntervals;
|
||||
|
||||
unsigned InstNo = 0;
|
||||
bool InRange = false;
|
||||
unsigned FirstInstInRange = 0;
|
||||
for (const Instruction &BBI : *BB) {
|
||||
if (isGuaranteedToTransferExecutionToSuccessor(&BBI)) {
|
||||
if (!InRange && isInterestingInstruction(&BBI)) {
|
||||
InRange = true;
|
||||
FirstInstInRange = InstNo;
|
||||
}
|
||||
} else {
|
||||
if (InRange) {
|
||||
assert(FirstInstInRange < InstNo && "Can't push an empty range here.");
|
||||
GuaranteedExecutionIntervals.emplace_back(FirstInstInRange, InstNo);
|
||||
}
|
||||
InRange = false;
|
||||
}
|
||||
|
||||
if (isInterestingInstruction(&BBI)) {
|
||||
auto It = InstNumbers.find(&BBI);
|
||||
assert(It != InstNumbers.end() &&
|
||||
InstNo <= It->second &&
|
||||
"missing number for interesting instruction");
|
||||
InstNo = It->second + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (InRange) {
|
||||
assert(FirstInstInRange < InstNo && "Can't push an empty range here.");
|
||||
GuaranteedExecutionIntervals.emplace_back(FirstInstInRange, InstNo);
|
||||
}
|
||||
|
||||
return GuaranteedExecutionIntervals;
|
||||
}
|
||||
|
||||
/// Return true if, when CxtI executes, it is guaranteed that either
|
||||
/// I had executed already or that I is guaranteed to be later executed.
|
||||
///
|
||||
/// The useful property this guarantees is that if I exhibits undefined
|
||||
/// behavior under some circumstances, then the whole program will exhibit
|
||||
/// undefined behavior at CxtI.
|
||||
bool isGuaranteedToBeExecuted(const Instruction *CxtI, const Instruction *I) {
|
||||
const BasicBlock *BB = CxtI->getParent();
|
||||
|
||||
if (BB != I->getParent()) {
|
||||
// Instructions in different basic blocks, so control flow
|
||||
// can diverge between them (we could track this with
|
||||
// postdoms, but we don't bother).
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned Index1 = getInstructionIndex(CxtI);
|
||||
unsigned Index2 = getInstructionIndex(I);
|
||||
|
||||
auto& BBGEI = GuaranteedExecutionIntervals[BB];
|
||||
if (!BBGEI.hasValue()) {
|
||||
BBGEI.emplace(computeGEI(BB));
|
||||
}
|
||||
|
||||
// We want to check whether I and CxtI are in the same range. To do that,
|
||||
// we notice that CxtI can only be in the first range R where
|
||||
// CxtI.end < R.end. If we find that range using binary search,
|
||||
// we can check whether I and CxtI are both in it.
|
||||
GuaranteedExecutionRange Bound(Index1, Index1);
|
||||
auto R = std::upper_bound(
|
||||
BBGEI->begin(), BBGEI->end(), Bound,
|
||||
[](GuaranteedExecutionRange I_, GuaranteedExecutionRange R) {
|
||||
return I_.End < R.End;
|
||||
});
|
||||
|
||||
return R != BBGEI->end() &&
|
||||
R->Start <= Index1 && Index1 < R->End &&
|
||||
R->Start <= Index2 && Index2 < R->End;
|
||||
}
|
||||
|
||||
/// Get or calculate the index of the specified instruction.
|
||||
unsigned getInstructionIndex(const Instruction *I) {
|
||||
assert(isInterestingInstruction(I) &&
|
||||
@ -213,9 +319,11 @@ public:
|
||||
// avoid gratuitus rescans.
|
||||
const BasicBlock *BB = I->getParent();
|
||||
unsigned InstNo = 0;
|
||||
GuaranteedExecutionIntervals.erase(BB);
|
||||
for (const Instruction &BBI : *BB)
|
||||
if (isInterestingInstruction(&BBI))
|
||||
InstNumbers[&BBI] = InstNo++;
|
||||
|
||||
It = InstNumbers.find(I);
|
||||
|
||||
assert(It != InstNumbers.end() && "Didn't insert instruction?");
|
||||
@ -224,7 +332,10 @@ public:
|
||||
|
||||
void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
|
||||
|
||||
void clear() { InstNumbers.clear(); }
|
||||
void clear() {
|
||||
InstNumbers.clear();
|
||||
GuaranteedExecutionIntervals.clear();
|
||||
}
|
||||
};
|
||||
|
||||
struct PromoteMem2Reg {
|
||||
@ -303,6 +414,7 @@ private:
|
||||
SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
|
||||
void RenamePass(BasicBlock *BB, BasicBlock *Pred,
|
||||
RenamePassData::ValVector &IncVals,
|
||||
LargeBlockInfo &LBI,
|
||||
std::vector<RenamePassData> &Worklist);
|
||||
bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
|
||||
};
|
||||
@ -321,6 +433,32 @@ static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
|
||||
AC->registerAssumption(CI);
|
||||
}
|
||||
|
||||
static void addAssumptionsFromMetadata(LoadInst *LI,
|
||||
Value *ReplVal,
|
||||
DominatorTree &DT,
|
||||
const DataLayout &DL,
|
||||
LargeBlockInfo &LBI,
|
||||
AssumptionCache *AC)
|
||||
{
|
||||
if (LI->getMetadata(LLVMContext::MD_nonnull) &&
|
||||
!isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) {
|
||||
addAssumeNonNull(AC, LI);
|
||||
}
|
||||
|
||||
if (auto *N = LI->getMetadata(LLVMContext::MD_range)) {
|
||||
// Range metadata is harder to use as an assumption,
|
||||
// so don't try to add one, but *do* try to copy
|
||||
// the metadata to a load in the same BB.
|
||||
if (LoadInst *NewLI = dyn_cast<LoadInst>(ReplVal)) {
|
||||
DEBUG(dbgs() << "trying to move !range metadata from" <<
|
||||
*LI << " to" << *NewLI << "\n");
|
||||
if (LBI.isGuaranteedToBeExecuted(LI, NewLI)) {
|
||||
copyRangeMetadata(DL, *LI, N, *NewLI);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
|
||||
// Knowing that this alloca is promotable, we know that it's safe to kill all
|
||||
// instructions except for load and store.
|
||||
@ -409,9 +547,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
|
||||
// If the load was marked as nonnull we don't want to lose
|
||||
// that information when we erase this Load. So we preserve
|
||||
// it with an assume.
|
||||
if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
|
||||
!isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
|
||||
addAssumeNonNull(AC, LI);
|
||||
addAssumptionsFromMetadata(LI, ReplVal, DT, DL, LBI, AC);
|
||||
|
||||
LI->replaceAllUsesWith(ReplVal);
|
||||
LI->eraseFromParent();
|
||||
@ -505,9 +641,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
|
||||
// Note, if the load was marked as nonnull we don't want to lose that
|
||||
// information when we erase it. So we preserve it with an assume.
|
||||
Value *ReplVal = std::prev(I)->second->getOperand(0);
|
||||
if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
|
||||
!isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
|
||||
addAssumeNonNull(AC, LI);
|
||||
addAssumptionsFromMetadata(LI, ReplVal, DT, DL, LBI, AC);
|
||||
|
||||
LI->replaceAllUsesWith(ReplVal);
|
||||
}
|
||||
@ -643,7 +777,6 @@ void PromoteMem2Reg::run() {
|
||||
|
||||
if (Allocas.empty())
|
||||
return; // All of the allocas must have been trivial!
|
||||
|
||||
LBI.clear();
|
||||
|
||||
// Set the incoming values for the basic block to be null values for all of
|
||||
@ -661,9 +794,10 @@ void PromoteMem2Reg::run() {
|
||||
RenamePassData RPD = std::move(RenamePassWorkList.back());
|
||||
RenamePassWorkList.pop_back();
|
||||
// RenamePass may add new worklist entries.
|
||||
RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
|
||||
RenamePass(RPD.BB, RPD.Pred, RPD.Values, LBI, RenamePassWorkList);
|
||||
} while (!RenamePassWorkList.empty());
|
||||
|
||||
LBI.clear();
|
||||
// The renamer uses the Visited set to avoid infinite loops. Clear it now.
|
||||
Visited.clear();
|
||||
|
||||
@ -875,6 +1009,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
|
||||
/// predecessor block Pred.
|
||||
void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
|
||||
RenamePassData::ValVector &IncomingVals,
|
||||
LargeBlockInfo &LBI,
|
||||
std::vector<RenamePassData> &Worklist) {
|
||||
NextIteration:
|
||||
// If we are inserting any phi nodes into this BB, they will already be in the
|
||||
@ -941,13 +1076,12 @@ NextIteration:
|
||||
// If the load was marked as nonnull we don't want to lose
|
||||
// that information when we erase this Load. So we preserve
|
||||
// it with an assume.
|
||||
if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
|
||||
!isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT))
|
||||
addAssumeNonNull(AC, LI);
|
||||
addAssumptionsFromMetadata(LI, V, DT, SQ.DL, LBI, AC);
|
||||
|
||||
// Anything using the load now uses the current value.
|
||||
LI->replaceAllUsesWith(V);
|
||||
BB->getInstList().erase(LI);
|
||||
LBI.deleteValue(LI);
|
||||
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
|
||||
// Delete this instruction and mark the name as the current holder of the
|
||||
// value
|
||||
@ -965,6 +1099,7 @@ NextIteration:
|
||||
for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])
|
||||
ConvertDebugDeclareToDebugValue(DII, SI, DIB);
|
||||
BB->getInstList().erase(SI);
|
||||
LBI.deleteValue(SI);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,8 @@
|
||||
; Make sure that SROA doesn't lose nonnull metadata
|
||||
; on loads from allocas that get optimized out.
|
||||
|
||||
%pair = type { i64, [0 x i64], [1 x i64] }
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
||||
|
||||
; Check that we do basic propagation of nonnull when rewriting.
|
||||
@ -42,6 +44,23 @@ entry:
|
||||
ret float* %ret
|
||||
}
|
||||
|
||||
; Make sure we propagate the !range attribute when we expand loads.
|
||||
define i64 @propagate_range(%pair* dereferenceable(16)) {
|
||||
; CHECK-LABEL: define i64 @propagate_range(
|
||||
; CHECK-NEXT: start:
|
||||
; CHECK-NEXT: %[[SROA_IDX:.*]] = getelementptr inbounds %pair
|
||||
; CHECK-NEXT: %[[RESULT:.*]] = load i64, i64* %[[SROA_IDX]], align 8, !range !1
|
||||
; CHECK: ret i64 %[[RESULT]]
|
||||
start:
|
||||
%a = alloca %pair
|
||||
%1 = bitcast %pair* %0 to i8*
|
||||
%2 = bitcast %pair* %a to i8*
|
||||
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %1, i64 16, i32 8, i1 false)
|
||||
%3 = getelementptr inbounds %pair, %pair* %a, i32 0, i32 0
|
||||
%4 = load i64, i64* %3, !range !1
|
||||
ret i64 %4
|
||||
}
|
||||
|
||||
; Make sure we properly handle the !nonnull attribute when we convert
|
||||
; a pointer load to an integer load.
|
||||
; FIXME: While this doesn't do anythnig actively harmful today, it really
|
||||
@ -90,3 +109,4 @@ entry:
|
||||
}
|
||||
|
||||
!0 = !{}
|
||||
!1 = !{i64 0, i64 2}
|
||||
|
Loading…
x
Reference in New Issue
Block a user