[VPlan] Add VPWidenCastRecipe, split off from VPWidenRecipe (NFCI).

To generate cast instructions, the result type is needed. To allow
creating widened casts without underlying instruction, introduce a new
VPWidenCastRecipe that also holds the result type.

This functionality will be used in a follow-up patch to
implement truncateToMinimalBitwidths as VPlan-to-VPlan transform.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D149081
This commit is contained in:
Florian Hahn 2023-05-05 13:20:16 +01:00
parent 95bb95ebe4
commit e3afe0b89d
No known key found for this signature in database
GPG Key ID: 9E54DEA47A8F4434
8 changed files with 94 additions and 47 deletions

View File

@ -8509,33 +8509,21 @@ VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I,
case Instruction::Add:
case Instruction::And:
case Instruction::AShr:
case Instruction::BitCast:
case Instruction::FAdd:
case Instruction::FCmp:
case Instruction::FDiv:
case Instruction::FMul:
case Instruction::FNeg:
case Instruction::FPExt:
case Instruction::FPToSI:
case Instruction::FPToUI:
case Instruction::FPTrunc:
case Instruction::FRem:
case Instruction::FSub:
case Instruction::ICmp:
case Instruction::IntToPtr:
case Instruction::LShr:
case Instruction::Mul:
case Instruction::Or:
case Instruction::PtrToInt:
case Instruction::Select:
case Instruction::SExt:
case Instruction::Shl:
case Instruction::SIToFP:
case Instruction::Sub:
case Instruction::Trunc:
case Instruction::UIToFP:
case Instruction::Xor:
case Instruction::ZExt:
case Instruction::Freeze:
return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end()));
};
@ -8688,6 +8676,11 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
*SI, make_range(Operands.begin(), Operands.end())));
}
if (auto *CI = dyn_cast<CastInst>(Instr)) {
return toVPRecipeResult(
new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), CI));
}
return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
}

View File

@ -247,11 +247,19 @@ void VPTransformState::addNewMetadata(Instruction *To,
}
void VPTransformState::addMetadata(Instruction *To, Instruction *From) {
// No source instruction to transfer metadata from?
if (!From)
return;
propagateMetadata(To, From);
addNewMetadata(To, From);
}
void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
// No source instruction to transfer metadata from?
if (!From)
return;
for (Value *V : To) {
if (Instruction *I = dyn_cast<Instruction>(V))
addMetadata(I, From);

View File

@ -959,6 +959,44 @@ public:
#endif
};
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
class VPWidenCastRecipe : public VPRecipeBase, public VPValue {
/// Cast instruction opcode.
Instruction::CastOps Opcode;
/// Result type for the cast.
Type *ResultTy;
public:
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
CastInst *UI = nullptr)
: VPRecipeBase(VPDef::VPWidenCastSC, Op), VPValue(this, UI),
Opcode(Opcode), ResultTy(ResultTy) {
assert((!UI || UI->getOpcode() == Opcode) &&
"opcode of underlying cast doesn't match");
assert((!UI || UI->getType() == ResultTy) &&
"result type of underlying cast doesn't match");
}
~VPWidenCastRecipe() override = default;
VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
Instruction::CastOps getOpcode() const { return Opcode; }
/// Returns the result type of the cast.
Type *getResultType() const { return ResultTy; }
};
/// A recipe for widening Call instructions.
class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
/// ID of the vector intrinsic to call when widening the call. If set the

View File

@ -57,6 +57,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPBlendSC:
case VPReductionSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@ -90,6 +91,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPBlendSC:
case VPReductionSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@ -128,6 +130,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPReductionSC:
case VPScalarIVStepsSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@ -684,34 +687,6 @@ void VPWidenRecipe::execute(VPTransformState &State) {
break;
}
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
auto *CI = cast<CastInst>(&I);
State.setDebugLocFromInst(CI);
/// Vectorize casts.
assert(State.VF.isVector() && "not widening");
Type *DestTy = VectorType::get(CI->getType(), State.VF);
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *A = State.get(getOperand(0), Part);
Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
State.set(this, Cast, Part);
State.addMetadata(Cast, &I);
}
break;
}
default:
// This instruction is not vectorized by simple widening.
LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
@ -729,6 +704,34 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
O << Cmp->getPredicate() << " ";
printOperands(O, SlotTracker);
}
#endif
void VPWidenCastRecipe::execute(VPTransformState &State) {
auto *I = cast_or_null<Instruction>(getUnderlyingValue());
if (I)
State.setDebugLocFromInst(I);
auto &Builder = State.Builder;
/// Vectorize casts.
assert(State.VF.isVector() && "Not vectorizing?");
Type *DestTy = VectorType::get(getResultType(), State.VF);
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *A = State.get(getOperand(0), Part);
Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
State.set(this, Cast, Part);
State.addMetadata(Cast, I);
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-CAST ";
printAsOperand(O, SlotTracker);
O << " = " << Instruction::getOpcodeName(Opcode) << " ";
printOperands(O, SlotTracker);
O << " to " << *getResultType();
}
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {

View File

@ -79,6 +79,10 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
NewRecipe =
new VPWidenSelectRecipe(*SI, Plan->mapToVPValues(SI->operands()));
} else if (auto *CI = dyn_cast<CastInst>(Inst)) {
NewRecipe = new VPWidenCastRecipe(
CI->getOpcode(), Plan->getVPValueOrAddLiveIn(CI->getOperand(0)),
CI->getType(), CI);
} else {
NewRecipe =
new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands()));

View File

@ -345,6 +345,7 @@ public:
VPScalarIVStepsSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
VPWidenGEPSC,
VPWidenMemoryInstructionSC,
VPWidenSC,

View File

@ -19,7 +19,7 @@ target triple = "arm64-apple-ios"
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%l> = load ir<%gep.src>
; CHECK-NEXT: WIDEN ir<%conv> = fpext ir<%l>
; CHECK-NEXT: WIDEN-CAST ir<%conv> = fpext ir<%l> to double
; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using library function: __simd_sin_v2f64)
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst>
@ -46,7 +46,7 @@ target triple = "arm64-apple-ios"
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr ir<%src>, vp<[[STEPS]]>
; CHECK-NEXT: WIDEN ir<%l> = load ir<%gep.src>
; CHECK-NEXT: WIDEN ir<%conv> = fpext ir<%l>
; CHECK-NEXT: WIDEN-CAST ir<%conv> = fpext ir<%l> to double
; CHECK-NEXT: WIDEN-CALL ir<%s> = call @llvm.sin.f64(ir<%conv>) (using vector intrinsic)
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>
; CHECK-NEXT: REPLICATE store ir<%s>, ir<%gep.dst>

View File

@ -42,7 +42,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize
; CHECK-NEXT: Successor(s): loop.0
; CHECK-EMPTY:
; CHECK-NEXT: loop.0:
; CHECK-NEXT: WIDEN ir<%conv> = sext vp<[[PRED1]]>
; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED1]]> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0> ir<%conv>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
@ -112,7 +112,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
@ -182,7 +182,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next>
; CHECK-NEXT: EMIT vp<[[WIDEN_CAN:%.+]]> = WIDEN-CANONICAL-INDUCTION vp<[[CAN_IV]]>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDEN_CAN]]> vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next>
; CHECK-NEXT: Successor(s): pred.srem
; CHECK-EMPTY:
@ -275,7 +275,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
; CHECK-NEXT: Successor(s): loop.0
; CHECK-EMPTY:
; CHECK-NEXT: loop.0:
; CHECK-NEXT: WIDEN ir<%conv> = sext vp<[[PRED]]>
; CHECK-NEXT: WIDEN-CAST ir<%conv> = sext vp<[[PRED]]> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%0> ir<%conv>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY:
@ -357,7 +357,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias
; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1>
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
; CHECK-NEXT: WIDEN ir<%recur.next> = sext ir<%y>
; CHECK-NEXT: WIDEN-CAST ir<%recur.next> = sext ir<%y> to i32
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%recur> ir<%recur.next>
; CHECK-NEXT: Successor(s): pred.store
; CHECK-EMPTY: