mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-24 11:36:10 +00:00
[InstSimplify] fold identity shuffles (recursing if needed)
This patch simplifies the examples from D31509 and D31927 (PR30630) and catches the basic identity shuffle tests that Zvi recently added. I'm not sure if we have something like this in DAGCombiner, but we should? It's worth noting that "MaxRecurse / RecursionLimit" is only 3 on entry at the moment. We might want to bump that up if there are longer shuffle chains like this in the wild. For now, we're ignoring shuffles that have undef mask elements because it's not clear how those should be handled. Differential Revision: https://reviews.llvm.org/D31960 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300714 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
af30bb9796
commit
19e4f8f1d9
@ -4080,6 +4080,60 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
|
||||
RecursionLimit);
|
||||
}
|
||||
|
||||
/// For the given destination element of a shuffle, peek through shuffles to
|
||||
/// match a root vector source operand that contains that element in the same
|
||||
/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
|
||||
static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
|
||||
Constant *Mask, Value *RootVec, int RootElt,
|
||||
unsigned MaxRecurse) {
|
||||
if (!MaxRecurse--)
|
||||
return nullptr;
|
||||
|
||||
// Bail out if any mask value is undefined. That kind of shuffle may be
|
||||
// simplified further based on demanded bits or other folds.
|
||||
int MaskVal = ShuffleVectorInst::getMaskValue(Mask, RootElt);
|
||||
if (MaskVal == -1)
|
||||
return nullptr;
|
||||
|
||||
// The mask value chooses which source operand we need to look at next.
|
||||
Value *SourceOp;
|
||||
int InVecNumElts = Op0->getType()->getVectorNumElements();
|
||||
if (MaskVal < InVecNumElts) {
|
||||
RootElt = MaskVal;
|
||||
SourceOp = Op0;
|
||||
} else {
|
||||
RootElt = MaskVal - InVecNumElts;
|
||||
SourceOp = Op1;
|
||||
}
|
||||
|
||||
// If the source operand is a shuffle itself, look through it to find the
|
||||
// matching root vector.
|
||||
if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) {
|
||||
return foldIdentityShuffles(
|
||||
DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1),
|
||||
SourceShuf->getMask(), RootVec, RootElt, MaxRecurse);
|
||||
}
|
||||
|
||||
// TODO: Look through bitcasts? What if the bitcast changes the vector element
|
||||
// size?
|
||||
|
||||
// The source operand is not a shuffle. Initialize the root vector value for
|
||||
// this shuffle if that has not been done yet.
|
||||
if (!RootVec)
|
||||
RootVec = SourceOp;
|
||||
|
||||
// Give up as soon as a source operand does not match the existing root value.
|
||||
if (RootVec != SourceOp)
|
||||
return nullptr;
|
||||
|
||||
// The element must be coming from the same lane in the source vector
|
||||
// (although it may have crossed lanes in intermediate shuffles).
|
||||
if (RootElt != DestElt)
|
||||
return nullptr;
|
||||
|
||||
return RootVec;
|
||||
}
|
||||
|
||||
static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
|
||||
Type *RetTy, const Query &Q,
|
||||
unsigned MaxRecurse) {
|
||||
@ -4124,7 +4178,28 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
|
||||
OpShuf->getMask()->getSplatValue())
|
||||
return Op1;
|
||||
|
||||
return nullptr;
|
||||
// Don't fold a shuffle with undef mask elements. This may get folded in a
|
||||
// better way using demanded bits or other analysis.
|
||||
// TODO: Should we allow this?
|
||||
for (unsigned i = 0; i != MaskNumElts; ++i)
|
||||
if (ShuffleVectorInst::getMaskValue(Mask, i) == -1)
|
||||
return nullptr;
|
||||
|
||||
// Check if every element of this shuffle can be mapped back to the
|
||||
// corresponding element of a single root vector. If so, we don't need this
|
||||
// shuffle. This handles simple identity shuffles as well as chains of
|
||||
// shuffles that may widen/narrow and/or move elements across lanes and back.
|
||||
Value *RootVec = nullptr;
|
||||
for (unsigned i = 0; i != MaskNumElts; ++i) {
|
||||
// Note that recursion is limited for each vector element, so if any element
|
||||
// exceeds the limit, this will fail to simplify.
|
||||
RootVec = foldIdentityShuffles(i, Op0, Op1, Mask, RootVec, i, MaxRecurse);
|
||||
|
||||
// We can't replace a widening/narrowing shuffle with one of its operands.
|
||||
if (!RootVec || RootVec->getType() != RetTy)
|
||||
return nullptr;
|
||||
}
|
||||
return RootVec;
|
||||
}
|
||||
|
||||
/// Given operands for a ShuffleVectorInst, fold the result or return null.
|
||||
|
@ -120,8 +120,7 @@ define <4 x i32> @undef_mask(<4 x i32> %x) {
|
||||
|
||||
define <4 x i32> @identity_mask_0(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @identity_mask_0(
|
||||
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
|
||||
;
|
||||
%shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
ret <4 x i32> %shuf
|
||||
@ -129,8 +128,7 @@ define <4 x i32> @identity_mask_0(<4 x i32> %x) {
|
||||
|
||||
define <4 x i32> @identity_mask_1(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @identity_mask_1(
|
||||
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> undef, <4 x i32> [[X:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
|
||||
;
|
||||
%shuf = shufflevector <4 x i32> undef, <4 x i32> %x, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
ret <4 x i32> %shuf
|
||||
@ -138,13 +136,32 @@ define <4 x i32> @identity_mask_1(<4 x i32> %x) {
|
||||
|
||||
define <4 x i32> @pseudo_identity_mask(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @pseudo_identity_mask(
|
||||
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
||||
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
|
||||
;
|
||||
%shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
||||
ret <4 x i32> %shuf
|
||||
}
|
||||
|
||||
define <4 x i32> @not_identity_mask(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @not_identity_mask(
|
||||
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
|
||||
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
|
||||
;
|
||||
%shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
|
||||
ret <4 x i32> %shuf
|
||||
}
|
||||
|
||||
; TODO: Should we simplify if the mask has an undef element?
|
||||
|
||||
define <4 x i32> @possible_identity_mask(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @possible_identity_mask(
|
||||
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
|
||||
;
|
||||
%shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
ret <4 x i32> %shuf
|
||||
}
|
||||
|
||||
define <4 x i32> @const_operand(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @const_operand(
|
||||
; CHECK-NEXT: ret <4 x i32> <i32 42, i32 45, i32 44, i32 43>
|
||||
@ -155,10 +172,7 @@ define <4 x i32> @const_operand(<4 x i32> %x) {
|
||||
|
||||
define <4 x i32> @merge(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @merge(
|
||||
; CHECK-NEXT: [[LOWER:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 1, i32 0>
|
||||
; CHECK-NEXT: [[UPPER:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
; CHECK-NEXT: [[MERGED:%.*]] = shufflevector <2 x i32> [[UPPER]], <2 x i32> [[LOWER]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
|
||||
; CHECK-NEXT: ret <4 x i32> [[MERGED]]
|
||||
; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
|
||||
;
|
||||
%lower = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 1, i32 0>
|
||||
%upper = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
@ -166,16 +180,24 @@ define <4 x i32> @merge(<4 x i32> %x) {
|
||||
ret <4 x i32> %merged
|
||||
}
|
||||
|
||||
; This crosses lanes from the source op.
|
||||
|
||||
define <4 x i32> @not_merge(<4 x i32> %x) {
|
||||
; CHECK-LABEL: @not_merge(
|
||||
; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
; CHECK-NEXT: [[U:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
; CHECK-NEXT: [[MERGED:%.*]] = shufflevector <2 x i32> [[U]], <2 x i32> [[L]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
|
||||
; CHECK-NEXT: ret <4 x i32> [[MERGED]]
|
||||
;
|
||||
%l = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
%u = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
%merged = shufflevector <2 x i32> %u, <2 x i32> %l, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
|
||||
ret <4 x i32> %merged
|
||||
}
|
||||
|
||||
define <8 x double> @extract_and_concat(<8 x double> %x) {
|
||||
; CHECK-LABEL: @extract_and_concat(
|
||||
; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x double> [[X:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
; CHECK-NEXT: [[S5:%.*]] = shufflevector <2 x double> [[S1]], <2 x double> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[S6:%.*]] = shufflevector <2 x double> [[S3]], <2 x double> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[S7:%.*]] = shufflevector <4 x double> [[S5]], <4 x double> [[S6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: ret <8 x double> [[S7]]
|
||||
; CHECK-NEXT: ret <8 x double> [[X:%.*]]
|
||||
;
|
||||
%s1 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%s2 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
@ -191,14 +213,7 @@ define <8 x double> @extract_and_concat(<8 x double> %x) {
|
||||
|
||||
define <8 x i64> @PR30630(<8 x i64> %x) {
|
||||
; CHECK-LABEL: @PR30630(
|
||||
; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i64> [[X:%.*]], <8 x i64> undef, <2 x i32> <i32 0, i32 4>
|
||||
; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 1, i32 5>
|
||||
; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 2, i32 6>
|
||||
; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 3, i32 7>
|
||||
; CHECK-NEXT: [[S5:%.*]] = shufflevector <2 x i64> [[S1]], <2 x i64> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[S6:%.*]] = shufflevector <2 x i64> [[S3]], <2 x i64> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: [[S7:%.*]] = shufflevector <4 x i64> [[S5]], <4 x i64> [[S6]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
|
||||
; CHECK-NEXT: ret <8 x i64> [[S7]]
|
||||
; CHECK-NEXT: ret <8 x i64> [[X:%.*]]
|
||||
;
|
||||
%s1 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
|
||||
%s2 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 1, i32 5>
|
||||
|
Loading…
x
Reference in New Issue
Block a user