mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-19 04:29:45 +00:00
[LV] Don't attempt to type-shrink scalarized instructions
After r288909, instructions feeding predicated instructions may be scalarized if profitable. Since these instructions will remain scalar, we shouldn't attempt to type-shrink them. We should only truncate vector types to their minimal bit widths. This bug was exposed by enabling the vectorization of loops containing conditional stores by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289958 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7e4360079c
commit
48a64092ba
@ -1917,6 +1917,13 @@ public:
|
||||
return Scalars->second.count(I);
|
||||
}
|
||||
|
||||
/// \returns True if instruction \p I can be truncated to a smaller bitwidth
|
||||
/// for vectorization factor \p VF.
|
||||
bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const {
|
||||
return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) &&
|
||||
!Legal->isScalarAfterVectorization(I);
|
||||
}
|
||||
|
||||
private:
|
||||
/// The vectorization cost is a combination of the cost itself and a boolean
|
||||
/// indicating whether any of the contributing operations will actually
|
||||
@ -3725,6 +3732,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
|
||||
//
|
||||
SmallPtrSet<Value *, 4> Erased;
|
||||
for (const auto &KV : Cost->getMinimalBitwidths()) {
|
||||
// If the value wasn't vectorized, we must maintain the original scalar
|
||||
// type. The absence of the value from VectorLoopValueMap indicates that it
|
||||
// wasn't vectorized.
|
||||
if (!VectorLoopValueMap.hasVector(KV.first))
|
||||
continue;
|
||||
VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
|
||||
for (Value *&I : Parts) {
|
||||
if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
|
||||
@ -3817,6 +3829,11 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
|
||||
|
||||
// We'll have created a bunch of ZExts that are now parentless. Clean up.
|
||||
for (const auto &KV : Cost->getMinimalBitwidths()) {
|
||||
// If the value wasn't vectorized, we must maintain the original scalar
|
||||
// type. The absence of the value from VectorLoopValueMap indicates that it
|
||||
// wasn't vectorized.
|
||||
if (!VectorLoopValueMap.hasVector(KV.first))
|
||||
continue;
|
||||
VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
|
||||
for (Value *&I : Parts) {
|
||||
ZExtInst *Inst = dyn_cast<ZExtInst>(I);
|
||||
@ -6837,7 +6854,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
unsigned VF,
|
||||
Type *&VectorTy) {
|
||||
Type *RetTy = I->getType();
|
||||
if (VF > 1 && MinBWs.count(I))
|
||||
if (canTruncateToMinimalBitwidth(I, VF))
|
||||
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
|
||||
VectorTy = ToVectorTy(RetTy, VF);
|
||||
auto SE = PSE.getSE();
|
||||
@ -6958,9 +6975,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
case Instruction::FCmp: {
|
||||
Type *ValTy = I->getOperand(0)->getType();
|
||||
Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
|
||||
auto It = MinBWs.find(Op0AsInstruction);
|
||||
if (VF > 1 && It != MinBWs.end())
|
||||
ValTy = IntegerType::get(ValTy->getContext(), It->second);
|
||||
if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
|
||||
ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
|
||||
VectorTy = ToVectorTy(ValTy, VF);
|
||||
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
|
||||
}
|
||||
@ -7108,7 +7124,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
|
||||
|
||||
Type *SrcScalarTy = I->getOperand(0)->getType();
|
||||
Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
|
||||
if (VF > 1 && MinBWs.count(I)) {
|
||||
if (canTruncateToMinimalBitwidth(I, VF)) {
|
||||
// This cast is going to be shrunk. This may remove the cast or it might
|
||||
// turn it into slightly different cast. For example, if MinBW == 16,
|
||||
// "zext i8 %1 to i32" becomes "zext i8 %1 to i16".
|
||||
|
@ -131,3 +131,56 @@ for.inc26:
|
||||
%iNewChunks.1.lcssa = phi i32 [ undef, %for.body9 ], [ %iNewChunks.2, %for.inc23 ]
|
||||
unreachable
|
||||
}
|
||||
|
||||
; VEC-LABEL: @minimal_bit_widths(
|
||||
;
|
||||
; In the test below, it's more profitable for the expression feeding the
|
||||
; conditional store to remain scalar. Since we can only type-shrink vector
|
||||
; types, we shouldn't try to represent the expression in a smaller type.
|
||||
;
|
||||
; VEC: vector.body:
|
||||
; VEC: %wide.load = load <2 x i8>, <2 x i8>* {{.*}}, align 1
|
||||
; VEC: br i1 {{.*}}, label %[[IF0:.+]], label %[[CONT0:.+]]
|
||||
; VEC: [[IF0]]:
|
||||
; VEC: %[[E0:.+]] = extractelement <2 x i8> %wide.load, i32 0
|
||||
; VEC: %[[Z0:.+]] = zext i8 %[[E0]] to i32
|
||||
; VEC: %[[T0:.+]] = trunc i32 %[[Z0]] to i8
|
||||
; VEC: store i8 %[[T0]], i8* {{.*}}, align 1
|
||||
; VEC: br label %[[CONT0]]
|
||||
; VEC: [[CONT0]]:
|
||||
; VEC: br i1 {{.*}}, label %[[IF1:.+]], label %[[CONT1:.+]]
|
||||
; VEC: [[IF1]]:
|
||||
; VEC: %[[E1:.+]] = extractelement <2 x i8> %wide.load, i32 1
|
||||
; VEC: %[[Z1:.+]] = zext i8 %[[E1]] to i32
|
||||
; VEC: %[[T1:.+]] = trunc i32 %[[Z1]] to i8
|
||||
; VEC: store i8 %[[T1]], i8* {{.*}}, align 1
|
||||
; VEC: br label %[[CONT1]]
|
||||
; VEC: [[CONT1]]:
|
||||
; VEC: br i1 {{.*}}, label %middle.block, label %vector.body
|
||||
;
|
||||
define void @minimal_bit_widths(i1 %c) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%tmp0 = phi i64 [ %tmp6, %for.inc ], [ 0, %entry ]
|
||||
%tmp1 = phi i64 [ %tmp7, %for.inc ], [ undef, %entry ]
|
||||
%tmp2 = getelementptr i8, i8* undef, i64 %tmp0
|
||||
%tmp3 = load i8, i8* %tmp2, align 1
|
||||
br i1 %c, label %if.then, label %for.inc
|
||||
|
||||
if.then:
|
||||
%tmp4 = zext i8 %tmp3 to i32
|
||||
%tmp5 = trunc i32 %tmp4 to i8
|
||||
store i8 %tmp5, i8* %tmp2, align 1
|
||||
br label %for.inc
|
||||
|
||||
for.inc:
|
||||
%tmp6 = add nuw nsw i64 %tmp0, 1
|
||||
%tmp7 = add i64 %tmp1, -1
|
||||
%tmp8 = icmp eq i64 %tmp7, 0
|
||||
br i1 %tmp8, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user