mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-24 12:19:53 +00:00
[X86][AVX512] Only combine EVEX targets shuffles to shuffles of the same number of vector elements
Over eager combing prevents the correct folding of writemasks. At the moment this occurs for ALL EVEX shuffles, in the future we need to check that the user of the root shuffle is a VSELECT that can fold to a writemask. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279934 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
afa0d1049b
commit
337ddd9188
@ -25287,6 +25287,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
}
|
||||
|
||||
unsigned RootSizeInBits = RootVT.getSizeInBits();
|
||||
unsigned NumRootElts = RootVT.getVectorNumElements();
|
||||
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
|
||||
bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
|
||||
(RootVT.is256BitVector() && !Subtarget.hasAVX2());
|
||||
@ -25297,11 +25298,10 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
// TODO - this currently prevents all lane shuffles from occurring.
|
||||
// TODO - check for writemasks usage instead of always preventing combining.
|
||||
// TODO - attempt to narrow Mask back to writemask size.
|
||||
if (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits &&
|
||||
(RootSizeInBits == 512 ||
|
||||
(Subtarget.hasVLX() && RootSizeInBits >= 128))) {
|
||||
bool IsEVEXShuffle =
|
||||
RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
|
||||
if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits))
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
|
||||
|
||||
@ -25370,6 +25370,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleVT)) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return false; // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return false; // AVX512 Writemask clash.
|
||||
Res = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
|
||||
@ -25383,6 +25385,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
ShuffleVT, PermuteImm)) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return false; // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return false; // AVX512 Writemask clash.
|
||||
Res = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
|
||||
@ -25398,6 +25402,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
ShuffleVT)) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return false; // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return false; // AVX512 Writemask clash.
|
||||
V1 = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(V1.getNode());
|
||||
V2 = DAG.getBitcast(ShuffleVT, V2);
|
||||
@ -25413,6 +25419,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
Shuffle, ShuffleVT, PermuteImm)) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return false; // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return false; // AVX512 Writemask clash.
|
||||
V1 = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(V1.getNode());
|
||||
V2 = DAG.getBitcast(ShuffleVT, V2);
|
||||
|
@ -3241,7 +3241,8 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind {
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: addq $24, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
; AVX512VL-NEXT: popq %r14
|
||||
@ -4148,7 +4149,8 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
|
||||
; AVX512VL-NEXT: addq $16, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
@ -5136,7 +5138,8 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: addq $40, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
; AVX512VL-NEXT: popq %r14
|
||||
@ -5939,7 +5942,8 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
|
||||
; AVX512VL-NEXT: addq $32, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
|
Loading…
Reference in New Issue
Block a user