mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-04-12 20:48:17 +00:00
[LV] Move getScalarizationOverhead and vector call cost computations to CM. (NFC)
This reduces the number of parameters we need to pass in and they seem a natural fit in LoopVectorizationCostModel. Also simplifies things for D59995. As a follow up refactoring, we could only expose a expose a shouldUseVectorIntrinsic() helper in LoopVectorizationCostModel, instead of calling getVectorCallCost/getVectorIntrinsicCost in InnerLoopVectorizer/VPRecipeBuilder. Reviewers: Ayal, hsaito, dcaballe, rengolin Reviewed By: rengolin Differential Revision: https://reviews.llvm.org/D61638 llvm-svn: 360758
This commit is contained in:
parent
5a89ac01ce
commit
fa1f5bcc06
@ -1169,6 +1169,18 @@ public:
|
||||
return foldTailByMasking() || Legal->blockNeedsPredication(BB);
|
||||
}
|
||||
|
||||
/// Estimate cost of an intrinsic call instruction CI if it were vectorized
|
||||
/// with factor VF. Return the cost of the instruction, including
|
||||
/// scalarization overhead if it's needed.
|
||||
unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF);
|
||||
|
||||
/// Estimate cost of a call instruction CI if it were vectorized with factor
|
||||
/// VF. Return the cost of the instruction, including scalarization overhead
|
||||
/// if it's needed. The flag NeedToScalarize shows if the call needs to be
|
||||
/// scalarized -
|
||||
// i.e. either vector version isn't available, or is too expensive.
|
||||
unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
|
||||
|
||||
private:
|
||||
unsigned NumPredStores = 0;
|
||||
|
||||
@ -1221,6 +1233,10 @@ private:
|
||||
/// element)
|
||||
unsigned getUniformMemOpCost(Instruction *I, unsigned VF);
|
||||
|
||||
/// Estimate the overhead of scalarizing an instruction. This is a
|
||||
/// convenience wrapper for the type-based getScalarizationOverhead API.
|
||||
unsigned getScalarizationOverhead(Instruction *I, unsigned VF);
|
||||
|
||||
/// Returns whether the instruction is a load or store and will be a emitted
|
||||
/// as a vector operation.
|
||||
bool isConsecutiveLoadOrStore(Instruction *I);
|
||||
@ -3057,45 +3073,9 @@ static void cse(BasicBlock *BB) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Estimate the overhead of scalarizing an instruction. This is a
|
||||
/// convenience wrapper for the type-based getScalarizationOverhead API.
|
||||
static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
|
||||
const TargetTransformInfo &TTI) {
|
||||
if (VF == 1)
|
||||
return 0;
|
||||
|
||||
unsigned Cost = 0;
|
||||
Type *RetTy = ToVectorTy(I->getType(), VF);
|
||||
if (!RetTy->isVoidTy() &&
|
||||
(!isa<LoadInst>(I) ||
|
||||
!TTI.supportsEfficientVectorElementLoadStore()))
|
||||
Cost += TTI.getScalarizationOverhead(RetTy, true, false);
|
||||
|
||||
// Some targets keep addresses scalar.
|
||||
if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
|
||||
return Cost;
|
||||
|
||||
if (CallInst *CI = dyn_cast<CallInst>(I)) {
|
||||
SmallVector<const Value *, 4> Operands(CI->arg_operands());
|
||||
Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
|
||||
}
|
||||
else if (!isa<StoreInst>(I) ||
|
||||
!TTI.supportsEfficientVectorElementLoadStore()) {
|
||||
SmallVector<const Value *, 4> Operands(I->operand_values());
|
||||
Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
|
||||
}
|
||||
|
||||
return Cost;
|
||||
}
|
||||
|
||||
// Estimate cost of a call instruction CI if it were vectorized with factor VF.
|
||||
// Return the cost of the instruction, including scalarization overhead if it's
|
||||
// needed. The flag NeedToScalarize shows if the call needs to be scalarized -
|
||||
// i.e. either vector version isn't available, or is too expensive.
|
||||
static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
|
||||
const TargetTransformInfo &TTI,
|
||||
const TargetLibraryInfo *TLI,
|
||||
bool &NeedToScalarize) {
|
||||
unsigned LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
|
||||
unsigned VF,
|
||||
bool &NeedToScalarize) {
|
||||
Function *F = CI->getCalledFunction();
|
||||
StringRef FnName = CI->getCalledFunction()->getName();
|
||||
Type *ScalarRetTy = CI->getType();
|
||||
@ -3118,7 +3098,7 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
|
||||
|
||||
// Compute costs of unpacking argument values for the scalar calls and
|
||||
// packing the return values to a vector.
|
||||
unsigned ScalarizationCost = getScalarizationOverhead(CI, VF, TTI);
|
||||
unsigned ScalarizationCost = getScalarizationOverhead(CI, VF);
|
||||
|
||||
unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
|
||||
|
||||
@ -3137,12 +3117,8 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
|
||||
return Cost;
|
||||
}
|
||||
|
||||
// Estimate cost of an intrinsic call instruction CI if it were vectorized with
|
||||
// factor VF. Return the cost of the instruction, including scalarization
|
||||
// overhead if it's needed.
|
||||
static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
|
||||
const TargetTransformInfo &TTI,
|
||||
const TargetLibraryInfo *TLI) {
|
||||
unsigned LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
|
||||
unsigned VF) {
|
||||
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
|
||||
assert(ID && "Expected intrinsic call!");
|
||||
|
||||
@ -4126,9 +4102,9 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
|
||||
// version of the instruction.
|
||||
// Is it beneficial to perform intrinsic call compared to lib call?
|
||||
bool NeedToScalarize;
|
||||
unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
|
||||
unsigned CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize);
|
||||
bool UseVectorIntrinsic =
|
||||
ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
|
||||
ID && Cost->getVectorIntrinsicCost(CI, VF) <= CallCost;
|
||||
assert((UseVectorIntrinsic || !NeedToScalarize) &&
|
||||
"Instruction should be scalarized elsewhere.");
|
||||
|
||||
@ -5522,7 +5498,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
|
||||
|
||||
// Get the overhead of the extractelement and insertelement instructions
|
||||
// we might create due to scalarization.
|
||||
Cost += getScalarizationOverhead(I, VF, TTI);
|
||||
Cost += getScalarizationOverhead(I, VF);
|
||||
|
||||
// If we have a predicated store, it may not be executed for each vector
|
||||
// lane. Scale the cost by the probability of executing the predicated
|
||||
@ -5674,6 +5650,34 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||
return VectorizationCostTy(C, TypeNotScalarized);
|
||||
}
|
||||
|
||||
unsigned LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
|
||||
unsigned VF) {
|
||||
|
||||
if (VF == 1)
|
||||
return 0;
|
||||
|
||||
unsigned Cost = 0;
|
||||
Type *RetTy = ToVectorTy(I->getType(), VF);
|
||||
if (!RetTy->isVoidTy() &&
|
||||
(!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
|
||||
Cost += TTI.getScalarizationOverhead(RetTy, true, false);
|
||||
|
||||
// Some targets keep addresses scalar.
|
||||
if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
|
||||
return Cost;
|
||||
|
||||
if (CallInst *CI = dyn_cast<CallInst>(I)) {
|
||||
SmallVector<const Value *, 4> Operands(CI->arg_operands());
|
||||
Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
|
||||
} else if (!isa<StoreInst>(I) ||
|
||||
!TTI.supportsEfficientVectorElementLoadStore()) {
|
||||
SmallVector<const Value *, 4> Operands(I->operand_values());
|
||||
Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
|
||||
}
|
||||
|
||||
return Cost;
|
||||
}
|
||||
|
||||
void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
|
||||
if (VF == 1)
|
||||
return;
|
||||
@ -5914,7 +5918,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
|
||||
// The cost of insertelement and extractelement instructions needed for
|
||||
// scalarization.
|
||||
Cost += getScalarizationOverhead(I, VF, TTI);
|
||||
Cost += getScalarizationOverhead(I, VF);
|
||||
|
||||
// Scale the cost by the probability of executing the predicated blocks.
|
||||
// This assumes the predicated block for each vector lane is equally
|
||||
@ -6035,16 +6039,16 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
case Instruction::Call: {
|
||||
bool NeedToScalarize;
|
||||
CallInst *CI = cast<CallInst>(I);
|
||||
unsigned CallCost = getVectorCallCost(CI, VF, TTI, TLI, NeedToScalarize);
|
||||
unsigned CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
|
||||
if (getVectorIntrinsicIDForCall(CI, TLI))
|
||||
return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
|
||||
return std::min(CallCost, getVectorIntrinsicCost(CI, VF));
|
||||
return CallCost;
|
||||
}
|
||||
default:
|
||||
// The cost of executing VF copies of the scalar instruction. This opcode
|
||||
// is unknown. Assume that it is the same as 'mul'.
|
||||
return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) +
|
||||
getScalarizationOverhead(I, VF, TTI);
|
||||
getScalarizationOverhead(I, VF);
|
||||
} // end of switch.
|
||||
}
|
||||
|
||||
@ -6638,9 +6642,9 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
|
||||
// version of the instruction.
|
||||
// Is it beneficial to perform intrinsic call compared to lib call?
|
||||
bool NeedToScalarize;
|
||||
unsigned CallCost = getVectorCallCost(CI, VF, *TTI, TLI, NeedToScalarize);
|
||||
unsigned CallCost = CM.getVectorCallCost(CI, VF, NeedToScalarize);
|
||||
bool UseVectorIntrinsic =
|
||||
ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
|
||||
ID && CM.getVectorIntrinsicCost(CI, VF) <= CallCost;
|
||||
return UseVectorIntrinsic || !NeedToScalarize;
|
||||
}
|
||||
if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
|
||||
@ -6828,7 +6832,7 @@ LoopVectorizationPlanner::buildVPlanWithVPRecipes(
|
||||
VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
|
||||
auto Plan = llvm::make_unique<VPlan>(VPBB);
|
||||
|
||||
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, TTI, Legal, CM, Builder);
|
||||
VPRecipeBuilder RecipeBuilder(OrigLoop, TLI, Legal, CM, Builder);
|
||||
// Represent values that will have defs inside VPlan.
|
||||
for (Value *V : NeedDef)
|
||||
Plan->addVPValue(V);
|
||||
|
@ -29,9 +29,6 @@ class VPRecipeBuilder {
|
||||
/// Target Library Info.
|
||||
const TargetLibraryInfo *TLI;
|
||||
|
||||
/// Target Transform Info.
|
||||
const TargetTransformInfo *TTI;
|
||||
|
||||
/// The legality analysis.
|
||||
LoopVectorizationLegality *Legal;
|
||||
|
||||
@ -104,11 +101,9 @@ public:
|
||||
|
||||
public:
|
||||
VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI,
|
||||
const TargetTransformInfo *TTI,
|
||||
LoopVectorizationLegality *Legal,
|
||||
LoopVectorizationCostModel &CM, VPBuilder &Builder)
|
||||
: OrigLoop(OrigLoop), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
|
||||
Builder(Builder) {}
|
||||
: OrigLoop(OrigLoop), TLI(TLI), Legal(Legal), CM(CM), Builder(Builder) {}
|
||||
|
||||
/// Check if a recipe can be create for \p I withing the given VF \p Range.
|
||||
/// If a recipe can be created, it adds it to \p VPBB.
|
||||
|
Loading…
x
Reference in New Issue
Block a user