mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-26 14:25:18 +00:00
Loop Vectorizer: Update the cost model of scatter/gather operations and make
them more expensive. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170995 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c4265e1d68
commit
d54fed2786
@ -69,8 +69,6 @@ public:
|
||||
|
||||
virtual ~VectorTargetTransformImpl() {}
|
||||
|
||||
virtual unsigned getInstrCost(unsigned Opcode, Type *Ty1, Type *Ty2) const;
|
||||
|
||||
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
|
||||
|
||||
virtual unsigned getBroadcastCost(Type *Tp) const;
|
||||
|
@ -135,44 +135,28 @@ public:
|
||||
virtual bool shouldBuildLookupTables() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// getPopcntHwSupport - Return hardware support for population count.
|
||||
virtual PopcntHwSupport getPopcntHwSupport(unsigned IntTyWidthInBit) const {
|
||||
return None;
|
||||
}
|
||||
|
||||
/// getIntImmCost - Return the expected cost of materializing the given
|
||||
/// integer immediate of the specified type.
|
||||
virtual unsigned getIntImmCost(const APInt&, Type*) const {
|
||||
// Default assumption is immediate is cheap.
|
||||
// The default assumption is that the immediate is cheap.
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
/// VectorTargetTransformInfo - This interface is used by the vectorizers
|
||||
/// to estimate the profitability of vectorization for different instructions.
|
||||
/// This interface provides the cost of different IR instructions. The cost
|
||||
/// is unit-less and represents the estimated throughput of the instruction
|
||||
/// (not the latency!) assuming that all branches are predicted, cache is hit,
|
||||
/// etc.
|
||||
class VectorTargetTransformInfo {
|
||||
public:
|
||||
virtual ~VectorTargetTransformInfo() {}
|
||||
|
||||
/// Returns the expected cost of the instruction opcode. The opcode is one of
|
||||
/// the enums like Instruction::Add. The type arguments are the type of the
|
||||
/// operation.
|
||||
/// Most instructions only use the first type and in that case the second
|
||||
/// operand is ignored.
|
||||
///
|
||||
/// Exceptions:
|
||||
/// * Br instructions do not use any of the types.
|
||||
/// * Select instructions pass the return type as Ty1 and the selector as Ty2.
|
||||
/// * Cast instructions pass the destination as Ty1 and the source as Ty2.
|
||||
/// * Insert/Extract element pass only the vector type as Ty1.
|
||||
/// * ShuffleVector, Load, Store do not use this call.
|
||||
virtual unsigned getInstrCost(unsigned Opcode,
|
||||
Type *Ty1 = 0,
|
||||
Type *Ty2 = 0) const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Returns the expected cost of arithmetic ops, such as mul, xor, fsub, etc.
|
||||
virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
|
||||
return 1;
|
||||
|
@ -132,7 +132,6 @@ int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const {
|
||||
|
||||
std::pair<unsigned, MVT>
|
||||
VectorTargetTransformImpl::getTypeLegalizationCost(Type *Ty) const {
|
||||
|
||||
LLVMContext &C = Ty->getContext();
|
||||
EVT MTy = TLI->getValueType(Ty);
|
||||
|
||||
@ -271,7 +270,7 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
return getScalarizationOverhead(Dst, true, true) + Num * Cost;
|
||||
}
|
||||
|
||||
// We already handled vector-to-vector and scalar-to-scalar conversions. This
|
||||
// We already handled vector-to-vector and scalar-to-scalar conversions. This
|
||||
// is where we handle bitcast between vectors and scalars. We need to assume
|
||||
// that the conversion is scalarized in one way or another.
|
||||
if (Opcode == Instruction::BitCast)
|
||||
@ -283,6 +282,7 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
|
||||
}
|
||||
|
||||
unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const {
|
||||
// Branches are assumed to be predicted.
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -330,12 +330,6 @@ unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode,
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned
|
||||
VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
|
||||
Type *Ty2) const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned
|
||||
VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment,
|
||||
|
@ -17988,7 +17988,6 @@ X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
|
||||
return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
|
||||
}
|
||||
|
||||
|
||||
unsigned
|
||||
X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment,
|
||||
|
@ -2080,17 +2080,23 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||
VectorTy = ToVectorTy(ValTy, VF);
|
||||
|
||||
if (VF == 1)
|
||||
return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
|
||||
return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy,
|
||||
SI->getAlignment(),
|
||||
SI->getPointerAddressSpace());
|
||||
|
||||
// Scalarized stores.
|
||||
if (!Legal->isConsecutivePtr(SI->getPointerOperand())) {
|
||||
unsigned Cost = 0;
|
||||
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
|
||||
ValTy);
|
||||
// The cost of extracting from the value vector.
|
||||
Cost += VF * (ExtCost);
|
||||
|
||||
// The cost of extracting from the value vector and pointer vector.
|
||||
Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF);
|
||||
for (unsigned i = 0; i < VF; ++i) {
|
||||
Cost += VTTI->getVectorInstrCost(Instruction::ExtractElement,
|
||||
VectorTy, i);
|
||||
Cost += VTTI->getVectorInstrCost(Instruction::ExtractElement,
|
||||
PtrTy, i);
|
||||
}
|
||||
|
||||
// The cost of the scalar stores.
|
||||
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
|
||||
ValTy->getScalarType(),
|
||||
@ -2107,16 +2113,25 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||
LoadInst *LI = cast<LoadInst>(I);
|
||||
|
||||
if (VF == 1)
|
||||
return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
|
||||
return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy,
|
||||
LI->getAlignment(),
|
||||
LI->getPointerAddressSpace());
|
||||
|
||||
// Scalarized loads.
|
||||
if (!Legal->isConsecutivePtr(LI->getPointerOperand())) {
|
||||
unsigned Cost = 0;
|
||||
unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
|
||||
// The cost of inserting the loaded value into the result vector.
|
||||
Cost += VF * (InCost);
|
||||
Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF);
|
||||
|
||||
// The cost of extracting from the pointer vector.
|
||||
for (unsigned i = 0; i < VF; ++i)
|
||||
Cost += VTTI->getVectorInstrCost(Instruction::ExtractElement,
|
||||
PtrTy, i);
|
||||
|
||||
// The cost of inserting data to the result vector.
|
||||
for (unsigned i = 0; i < VF; ++i)
|
||||
Cost += VTTI->getVectorInstrCost(Instruction::InsertElement,
|
||||
VectorTy, i);
|
||||
|
||||
// The cost of the scalar stores.
|
||||
Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
|
||||
RetTy->getScalarType(),
|
||||
@ -2169,18 +2184,19 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
|
||||
bool IsVoid = RetTy->isVoidTy();
|
||||
|
||||
unsigned InsCost = (IsVoid ? 0 :
|
||||
VTTI->getInstrCost(Instruction::InsertElement,
|
||||
VTTI->getVectorInstrCost(Instruction::InsertElement,
|
||||
VectorTy));
|
||||
|
||||
unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
|
||||
unsigned ExtCost = VTTI->getVectorInstrCost(Instruction::ExtractElement,
|
||||
VectorTy);
|
||||
|
||||
// The cost of inserting the results plus extracting each one of the
|
||||
// operands.
|
||||
Cost += VF * (InsCost + ExtCost * I->getNumOperands());
|
||||
|
||||
// The cost of executing VF copies of the scalar instruction.
|
||||
Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
|
||||
// The cost of executing VF copies of the scalar instruction. This opcode
|
||||
// is unknown. Assume that it is the same as 'mul'.
|
||||
Cost += VF * VTTI->getArithmeticInstrCost(Instruction::Mul, VectorTy);
|
||||
return Cost;
|
||||
}
|
||||
}// end of switch.
|
||||
|
@ -8,8 +8,11 @@ target triple = "x86_64-apple-macosx10.8.0"
|
||||
@d = common global [2048 x i32] zeroinitializer, align 16
|
||||
@a = common global [2048 x i32] zeroinitializer, align 16
|
||||
|
||||
; The program below gathers and scatters data. We better not vectorize it.
|
||||
;CHECK: cost_model_1
|
||||
;CHECK: <4 x i32>
|
||||
;CHECK-NOT: <2 x i32>
|
||||
;CHECK-NOT: <4 x i32>
|
||||
;CHECK-NOT: <8 x i32>
|
||||
;CHECK: ret void
|
||||
define void @cost_model_1() nounwind uwtable noinline ssp {
|
||||
entry:
|
||||
|
Loading…
x
Reference in New Issue
Block a user