mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-24 12:20:00 +00:00
Implement a basic cost model for vector and scalar instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166642 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d95666c226
commit
2652c50f74
@ -1958,6 +1958,7 @@ private:
|
||||
|
||||
ValueTypeActionImpl ValueTypeActions;
|
||||
|
||||
public:
|
||||
LegalizeKind
|
||||
getTypeConversion(LLVMContext &Context, EVT VT) const {
|
||||
// If this is a simple type, use the ComputeRegisterProp mechanism.
|
||||
@ -2074,6 +2075,7 @@ private:
|
||||
return LegalizeKind(TypeSplitVector, NVT);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::pair<EVT, const TargetRegisterClass*> > AvailableRegClasses;
|
||||
|
||||
/// TargetDAGCombineArray - Targets can specify ISD nodes that they would
|
||||
|
@ -16,6 +16,7 @@
|
||||
#define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
|
||||
|
||||
#include "llvm/TargetTransformInfo.h"
|
||||
#include "llvm/CodeGen/ValueTypes.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
@ -51,6 +52,10 @@ class VectorTargetTransformImpl : public VectorTargetTransformInfo {
|
||||
private:
|
||||
const TargetLowering *TLI;
|
||||
|
||||
/// Estimate the cost of type-legalization and the legalized type.
|
||||
std::pair<unsigned, EVT>
|
||||
getTypeLegalizationCost(LLVMContext &C, EVT Ty) const;
|
||||
|
||||
public:
|
||||
explicit VectorTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "llvm/Target/TargetTransformImpl.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
#include <utility>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -53,11 +54,131 @@ unsigned ScalarTargetTransformImpl::getJumpBufSize() const {
|
||||
// Calls used by the vectorizers.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
int InstructionOpcodeToISD(unsigned Opcode) {
|
||||
static const int OpToISDTbl[] = {
|
||||
/*Instruction::Ret */ 0, // Opcode numbering start at #1.
|
||||
/*Instruction::Br */ 0,
|
||||
/*Instruction::Switch */ 0,
|
||||
/*Instruction::IndirectBr */ 0,
|
||||
/*Instruction::Invoke */ 0,
|
||||
/*Instruction::Resume */ 0,
|
||||
/*Instruction::Unreachable */ 0,
|
||||
/*Instruction::Add */ ISD::ADD,
|
||||
/*Instruction::FAdd */ ISD::FADD,
|
||||
/*Instruction::Sub */ ISD::SUB,
|
||||
/*Instruction::FSub */ ISD::FSUB,
|
||||
/*Instruction::Mul */ ISD::MUL,
|
||||
/*Instruction::FMul */ ISD::FMUL,
|
||||
/*Instruction::UDiv */ ISD::UDIV,
|
||||
/*Instruction::SDiv */ ISD::UDIV,
|
||||
/*Instruction::FDiv */ ISD::FDIV,
|
||||
/*Instruction::URem */ ISD::UREM,
|
||||
/*Instruction::SRem */ ISD::SREM,
|
||||
/*Instruction::FRem */ ISD::FREM,
|
||||
/*Instruction::Shl */ ISD::SHL,
|
||||
/*Instruction::LShr */ ISD::SRL,
|
||||
/*Instruction::AShr */ ISD::SRA,
|
||||
/*Instruction::And */ ISD::AND,
|
||||
/*Instruction::Or */ ISD::OR,
|
||||
/*Instruction::Xor */ ISD::XOR,
|
||||
/*Instruction::Alloca */ 0,
|
||||
/*Instruction::Load */ ISD::LOAD,
|
||||
/*Instruction::Store */ ISD::STORE,
|
||||
/*Instruction::GetElementPtr */ 0,
|
||||
/*Instruction::Fence */ 0,
|
||||
/*Instruction::AtomicCmpXchg */ 0,
|
||||
/*Instruction::AtomicRMW */ 0,
|
||||
/*Instruction::Trunc */ ISD::TRUNCATE,
|
||||
/*Instruction::ZExt */ ISD::ZERO_EXTEND,
|
||||
/*Instruction::SExt */ ISD::SEXTLOAD,
|
||||
/*Instruction::FPToUI */ ISD::FP_TO_UINT,
|
||||
/*Instruction::FPToSI */ ISD::FP_TO_SINT,
|
||||
/*Instruction::UIToFP */ ISD::UINT_TO_FP,
|
||||
/*Instruction::SIToFP */ ISD::SINT_TO_FP,
|
||||
/*Instruction::FPTrunc */ ISD::FP_ROUND,
|
||||
/*Instruction::FPExt */ ISD::FP_EXTEND,
|
||||
/*Instruction::PtrToInt */ ISD::BITCAST,
|
||||
/*Instruction::IntToPtr */ ISD::BITCAST,
|
||||
/*Instruction::BitCast */ ISD::BITCAST,
|
||||
/*Instruction::ICmp */ ISD::SETCC,
|
||||
/*Instruction::FCmp */ ISD::SETCC,
|
||||
/*Instruction::PHI */ 0,
|
||||
/*Instruction::Call */ 0,
|
||||
/*Instruction::Select */ ISD::SELECT,
|
||||
/*Instruction::UserOp1 */ 0,
|
||||
/*Instruction::UserOp2 */ 0,
|
||||
/*Instruction::VAArg */ 0,
|
||||
/*Instruction::ExtractElement*/ ISD::EXTRACT_VECTOR_ELT,
|
||||
/*Instruction::InsertElement */ ISD::INSERT_VECTOR_ELT,
|
||||
/*Instruction::ShuffleVector */ ISD::VECTOR_SHUFFLE,
|
||||
/*Instruction::ExtractValue */ ISD::MERGE_VALUES,
|
||||
/*Instruction::InsertValue */ ISD::MERGE_VALUES,
|
||||
/*Instruction::LandingPad */ 0};
|
||||
|
||||
assert((Instruction::Ret == 1) && (Instruction::LandingPad == 58) &&
|
||||
"Instruction order had changed");
|
||||
|
||||
// Opcode numbering starts at #1 but the table starts at #0, so we subtract
|
||||
// one from the opcode number.
|
||||
return OpToISDTbl[Opcode - 1];
|
||||
}
|
||||
|
||||
std::pair<unsigned, EVT>
|
||||
VectorTargetTransformImpl::getTypeLegalizationCost(LLVMContext &C,
|
||||
EVT Ty) const {
|
||||
unsigned Cost = 1;
|
||||
// We keep legalizing the type until we find a legal kind. We assume that
|
||||
// the only operation that costs anything is the split. After splitting
|
||||
// we need to handle two types.
|
||||
while (true) {
|
||||
TargetLowering::LegalizeKind LK = TLI->getTypeConversion(C, Ty);
|
||||
|
||||
if (LK.first == TargetLowering::TypeLegal)
|
||||
return std::make_pair(Cost, LK.second);
|
||||
|
||||
if (LK.first == TargetLowering::TypeSplitVector)
|
||||
Cost *= 2;
|
||||
|
||||
// Keep legalizing the type.
|
||||
Ty = LK.second;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
|
||||
Type *Ty2) const {
|
||||
return 1;
|
||||
// Check if any of the operands are vector operands.
|
||||
int ISD = InstructionOpcodeToISD(Opcode);
|
||||
|
||||
// Selects on vectors are actually vector selects.
|
||||
if (ISD == ISD::SELECT) {
|
||||
assert(Ty2 && "Ty2 must hold the select type");
|
||||
if (Ty2->isVectorTy())
|
||||
ISD = ISD::VSELECT;
|
||||
}
|
||||
|
||||
// If we don't have any information about this instruction assume it costs 1.
|
||||
if (ISD == 0)
|
||||
return 1;
|
||||
|
||||
assert(Ty1 && "We need to have at least one type");
|
||||
|
||||
// From this stage we look at the legalized type.
|
||||
std::pair<unsigned, EVT> LT =
|
||||
getTypeLegalizationCost(Ty1->getContext(), TLI->getValueType(Ty1));
|
||||
|
||||
if (TLI->isOperationLegalOrCustom(ISD, LT.second)) {
|
||||
// The operation is legal. Assume it costs 1. Multiply
|
||||
// by the type-legalization overhead.
|
||||
return LT.first * 1;
|
||||
}
|
||||
|
||||
unsigned NumElem =
|
||||
(LT.second.isVector() ? LT.second.getVectorNumElements() : 1);
|
||||
|
||||
// We will probably scalarize this instruction. Assume that the cost is the
|
||||
// number of the vector elements.
|
||||
return LT.first * NumElem * 1;
|
||||
}
|
||||
|
||||
unsigned
|
||||
@ -69,5 +190,9 @@ unsigned
|
||||
VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment,
|
||||
unsigned AddressSpace) const {
|
||||
return 1;
|
||||
// From this stage we look at the legalized type.
|
||||
std::pair<unsigned, EVT> LT =
|
||||
getTypeLegalizationCost(Src->getContext(), TLI->getValueType(Src));
|
||||
// Assume that all loads of legal types cost 1.
|
||||
return LT.first;
|
||||
}
|
||||
|
@ -185,7 +185,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
|
||||
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
|
||||
MPM.add(createLoopDeletionPass()); // Delete dead loops
|
||||
|
||||
if (Vectorize) {
|
||||
if (Vectorize || true) {
|
||||
MPM.add(createLoopVectorizePass());
|
||||
MPM.add(createLICMPass());
|
||||
}
|
||||
|
@ -300,10 +300,10 @@ private:
|
||||
class LoopVectorizationCostModel {
|
||||
public:
|
||||
/// C'tor.
|
||||
LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl,
|
||||
LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
|
||||
LoopVectorizationLegality *Leg,
|
||||
const VectorTargetTransformInfo *Vtti):
|
||||
TheLoop(Lp), SE(Se), DL(Dl), Legal(Leg), VTTI(Vtti) { }
|
||||
TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
|
||||
|
||||
/// Returns the most profitable vectorization factor for the loop that is
|
||||
/// smaller or equal to the VF argument. This method checks every power
|
||||
@ -325,8 +325,7 @@ private:
|
||||
Loop *TheLoop;
|
||||
/// Scev analysis.
|
||||
ScalarEvolution *SE;
|
||||
/// DataLayout analysis.
|
||||
DataLayout *DL;
|
||||
|
||||
/// Vectorization legality.
|
||||
LoopVectorizationLegality *Legal;
|
||||
/// Vector target information.
|
||||
@ -372,7 +371,7 @@ struct LoopVectorize : public LoopPass {
|
||||
if (TTI)
|
||||
VTTI = TTI->getVectorTargetTransformInfo();
|
||||
// Use the cost model.
|
||||
LoopVectorizationCostModel CM(L, SE, DL, &LVL, VTTI);
|
||||
LoopVectorizationCostModel CM(L, SE, &LVL, VTTI);
|
||||
VF = CM.findBestVectorizationFactor();
|
||||
|
||||
if (VF == 1) {
|
||||
@ -1432,11 +1431,12 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
|
||||
// For each instruction in the old loop.
|
||||
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
|
||||
Instruction *Inst = it;
|
||||
Cost += getInstructionCost(Inst, VF);
|
||||
unsigned C = getInstructionCost(Inst, VF);
|
||||
Cost += C;
|
||||
DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF <<
|
||||
" For instruction: "<< *Inst << "\n");
|
||||
}
|
||||
|
||||
// Return the cost divided by VF, because we will be executing
|
||||
// less iterations of the vector form.
|
||||
return Cost;
|
||||
}
|
||||
|
||||
@ -1444,11 +1444,13 @@ unsigned
|
||||
LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||
assert(VTTI && "Invalid vector target transformation info");
|
||||
switch (I->getOpcode()) {
|
||||
case Instruction::GetElementPtr:
|
||||
return 0;
|
||||
case Instruction::Br: {
|
||||
return VTTI->getInstrCost(I->getOpcode());
|
||||
}
|
||||
case Instruction::PHI:
|
||||
// PHIs are handled the same as the binary instructions below.
|
||||
return 0;
|
||||
case Instruction::Add:
|
||||
case Instruction::FAdd:
|
||||
case Instruction::Sub:
|
||||
@ -1493,11 +1495,17 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||
// Scalarized stores.
|
||||
if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
|
||||
unsigned Cost = 0;
|
||||
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement, VTy);
|
||||
// The cost of extracting from the vector value.
|
||||
Cost += VF * ExtCost;
|
||||
if (VF != 1) {
|
||||
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
|
||||
VTy);
|
||||
// The cost of extracting from the value vector and pointer vector.
|
||||
Cost += VF * (ExtCost * 2);
|
||||
}
|
||||
// The cost of the scalar stores.
|
||||
Cost += VF * VTTI->getInstrCost(I->getOpcode(), VTy->getScalarType());
|
||||
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
|
||||
VTy->getScalarType(),
|
||||
SI->getAlignment(),
|
||||
SI->getPointerAddressSpace());
|
||||
return Cost;
|
||||
}
|
||||
|
||||
@ -1512,11 +1520,18 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||
// Scalarized loads.
|
||||
if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
|
||||
unsigned Cost = 0;
|
||||
unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
|
||||
// The cost of inserting the loaded value into the result vector.
|
||||
Cost += VF * InCost;
|
||||
if (VF != 1) {
|
||||
unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, VTy);
|
||||
unsigned ExCost = VTTI->getInstrCost(Instruction::ExtractValue, VTy);
|
||||
|
||||
// The cost of inserting the loaded value into the result vector, and
|
||||
// extracting from a vector of pointers.
|
||||
Cost += VF * (InCost + ExCost);
|
||||
}
|
||||
// The cost of the scalar stores.
|
||||
Cost += VF * VTTI->getInstrCost(I->getOpcode(), VTy->getScalarType());
|
||||
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(), VTy->getScalarType(),
|
||||
LI->getAlignment(),
|
||||
LI->getPointerAddressSpace());
|
||||
return Cost;
|
||||
}
|
||||
|
||||
|
@ -8,10 +8,8 @@ target triple = "x86_64-apple-macosx10.8.0"
|
||||
@d = common global [2048 x i32] zeroinitializer, align 16
|
||||
@a = common global [2048 x i32] zeroinitializer, align 16
|
||||
|
||||
; At this point the cost model is pretty bad and we are vectorizing the code below.
|
||||
; TODO: This code should not be vectorized on x86.
|
||||
;CHECK: cost_model_1
|
||||
;CHECK: <4 x i32>
|
||||
;CHECK-NOT: <4 x i32>
|
||||
;CHECK: ret void
|
||||
define void @cost_model_1() nounwind uwtable noinline ssp {
|
||||
entry:
|
||||
|
Loading…
Reference in New Issue
Block a user