[InstSimplify] fold identity shuffles (recursing if needed)

This patch simplifies the examples from D31509 and D31927 (PR30630) and catches 
the basic identity shuffle tests that Zvi recently added.

I'm not sure if we have something like this in DAGCombiner, but we should?

It's worth noting that "MaxRecurse / RecursionLimit" is only 3 on entry at the moment. 
We might want to bump that up if there are longer shuffle chains like this in the wild.

For now, we're ignoring shuffles that have undef mask elements because it's not
clear how those should be handled.

Differential Revision: https://reviews.llvm.org/D31960


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300714 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjay Patel 2017-04-19 16:48:22 +00:00
parent af30bb9796
commit 19e4f8f1d9
2 changed files with 117 additions and 27 deletions

View File

@ -4080,6 +4080,60 @@ Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
RecursionLimit);
}
/// For the given destination element of a shuffle, peek through shuffles to
/// match a root vector source operand that contains that element in the same
/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
Constant *Mask, Value *RootVec, int RootElt,
unsigned MaxRecurse) {
if (!MaxRecurse--)
return nullptr;
// Bail out if any mask value is undefined. That kind of shuffle may be
// simplified further based on demanded bits or other folds.
int MaskVal = ShuffleVectorInst::getMaskValue(Mask, RootElt);
if (MaskVal == -1)
return nullptr;
// The mask value chooses which source operand we need to look at next.
Value *SourceOp;
int InVecNumElts = Op0->getType()->getVectorNumElements();
if (MaskVal < InVecNumElts) {
RootElt = MaskVal;
SourceOp = Op0;
} else {
RootElt = MaskVal - InVecNumElts;
SourceOp = Op1;
}
// If the source operand is a shuffle itself, look through it to find the
// matching root vector.
if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) {
return foldIdentityShuffles(
DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1),
SourceShuf->getMask(), RootVec, RootElt, MaxRecurse);
}
// TODO: Look through bitcasts? What if the bitcast changes the vector element
// size?
// The source operand is not a shuffle. Initialize the root vector value for
// this shuffle if that has not been done yet.
if (!RootVec)
RootVec = SourceOp;
// Give up as soon as a source operand does not match the existing root value.
if (RootVec != SourceOp)
return nullptr;
// The element must be coming from the same lane in the source vector
// (although it may have crossed lanes in intermediate shuffles).
if (RootElt != DestElt)
return nullptr;
return RootVec;
}
static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
Type *RetTy, const Query &Q,
unsigned MaxRecurse) {
@ -4124,7 +4178,28 @@ static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask,
OpShuf->getMask()->getSplatValue())
return Op1;
return nullptr;
// Don't fold a shuffle with undef mask elements. This may get folded in a
// better way using demanded bits or other analysis.
// TODO: Should we allow this?
for (unsigned i = 0; i != MaskNumElts; ++i)
if (ShuffleVectorInst::getMaskValue(Mask, i) == -1)
return nullptr;
// Check if every element of this shuffle can be mapped back to the
// corresponding element of a single root vector. If so, we don't need this
// shuffle. This handles simple identity shuffles as well as chains of
// shuffles that may widen/narrow and/or move elements across lanes and back.
Value *RootVec = nullptr;
for (unsigned i = 0; i != MaskNumElts; ++i) {
// Note that recursion is limited for each vector element, so if any element
// exceeds the limit, this will fail to simplify.
RootVec = foldIdentityShuffles(i, Op0, Op1, Mask, RootVec, i, MaxRecurse);
// We can't replace a widening/narrowing shuffle with one of its operands.
if (!RootVec || RootVec->getType() != RetTy)
return nullptr;
}
return RootVec;
}
/// Given operands for a ShuffleVectorInst, fold the result or return null.

View File

@ -120,8 +120,7 @@ define <4 x i32> @undef_mask(<4 x i32> %x) {
define <4 x i32> @identity_mask_0(<4 x i32> %x) {
; CHECK-LABEL: @identity_mask_0(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %shuf
@ -129,8 +128,7 @@ define <4 x i32> @identity_mask_0(<4 x i32> %x) {
define <4 x i32> @identity_mask_1(<4 x i32> %x) {
; CHECK-LABEL: @identity_mask_1(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> undef, <4 x i32> [[X:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%shuf = shufflevector <4 x i32> undef, <4 x i32> %x, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i32> %shuf
@ -138,13 +136,32 @@ define <4 x i32> @identity_mask_1(<4 x i32> %x) {
define <4 x i32> @pseudo_identity_mask(<4 x i32> %x) {
; CHECK-LABEL: @pseudo_identity_mask(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i32> %shuf
}
define <4 x i32> @not_identity_mask(<4 x i32> %x) {
; CHECK-LABEL: @not_identity_mask(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 2, i32 6>
ret <4 x i32> %shuf
}
; TODO: Should we simplify if the mask has an undef element?
define <4 x i32> @possible_identity_mask(<4 x i32> %x) {
; CHECK-LABEL: @possible_identity_mask(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT: ret <4 x i32> [[SHUF]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
ret <4 x i32> %shuf
}
define <4 x i32> @const_operand(<4 x i32> %x) {
; CHECK-LABEL: @const_operand(
; CHECK-NEXT: ret <4 x i32> <i32 42, i32 45, i32 44, i32 43>
@ -155,10 +172,7 @@ define <4 x i32> @const_operand(<4 x i32> %x) {
define <4 x i32> @merge(<4 x i32> %x) {
; CHECK-LABEL: @merge(
; CHECK-NEXT: [[LOWER:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: [[UPPER:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[MERGED:%.*]] = shufflevector <2 x i32> [[UPPER]], <2 x i32> [[LOWER]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
; CHECK-NEXT: ret <4 x i32> [[MERGED]]
; CHECK-NEXT: ret <4 x i32> [[X:%.*]]
;
%lower = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 1, i32 0>
%upper = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@ -166,16 +180,24 @@ define <4 x i32> @merge(<4 x i32> %x) {
ret <4 x i32> %merged
}
; This crosses lanes from the source op.
define <4 x i32> @not_merge(<4 x i32> %x) {
; CHECK-LABEL: @not_merge(
; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[U:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[MERGED:%.*]] = shufflevector <2 x i32> [[U]], <2 x i32> [[L]], <4 x i32> <i32 3, i32 2, i32 0, i32 1>
; CHECK-NEXT: ret <4 x i32> [[MERGED]]
;
%l = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%u = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%merged = shufflevector <2 x i32> %u, <2 x i32> %l, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
ret <4 x i32> %merged
}
define <8 x double> @extract_and_concat(<8 x double> %x) {
; CHECK-LABEL: @extract_and_concat(
; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x double> [[X:%.*]], <8 x double> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 4, i32 5>
; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> <i32 6, i32 7>
; CHECK-NEXT: [[S5:%.*]] = shufflevector <2 x double> [[S1]], <2 x double> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[S6:%.*]] = shufflevector <2 x double> [[S3]], <2 x double> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[S7:%.*]] = shufflevector <4 x double> [[S5]], <4 x double> [[S6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x double> [[S7]]
; CHECK-NEXT: ret <8 x double> [[X:%.*]]
;
%s1 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%s2 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> <i32 2, i32 3>
@ -191,14 +213,7 @@ define <8 x double> @extract_and_concat(<8 x double> %x) {
define <8 x i64> @PR30630(<8 x i64> %x) {
; CHECK-LABEL: @PR30630(
; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i64> [[X:%.*]], <8 x i64> undef, <2 x i32> <i32 0, i32 4>
; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 1, i32 5>
; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 2, i32 6>
; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> <i32 3, i32 7>
; CHECK-NEXT: [[S5:%.*]] = shufflevector <2 x i64> [[S1]], <2 x i64> [[S2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[S6:%.*]] = shufflevector <2 x i64> [[S3]], <2 x i64> [[S4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[S7:%.*]] = shufflevector <4 x i64> [[S5]], <4 x i64> [[S6]], <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
; CHECK-NEXT: ret <8 x i64> [[S7]]
; CHECK-NEXT: ret <8 x i64> [[X:%.*]]
;
%s1 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 0, i32 4>
%s2 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> <i32 1, i32 5>