mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-08 13:00:43 +00:00
[X86] Bitcast subvector before broadcasting it.
Since r274013, we've been looking through bitcasts on broadcast inputs. In the scalar-folding case (from a load, build_vector, or sc2vec), the input type didn't matter, as we'd simply bitcast the resulting scalar back. However, when broadcasting a 128-bit-lane-aligned element, we create an EXTRACT_SUBVECTOR. Use proper types, by creating an extract_subvector of the original input type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294774 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0aa6910479
commit
d0491a6b56
@ -9687,7 +9687,16 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
|
||||
if (((BroadcastIdx * EltSize) % 128) != 0)
|
||||
return SDValue();
|
||||
|
||||
MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 128 / EltSize);
|
||||
// The shuffle input might have been a bitcast we looked through; look at
|
||||
// the original input vector. Emit an EXTRACT_SUBVECTOR of that type; we'll
|
||||
// later bitcast it to BroadcastVT.
|
||||
MVT SrcVT = V.getSimpleValueType();
|
||||
assert(SrcVT.getScalarSizeInBits() == BroadcastVT.getScalarSizeInBits() &&
|
||||
"Unexpected vector element size");
|
||||
assert(SrcVT.getVectorNumElements() == BroadcastVT.getVectorNumElements() &&
|
||||
"Unexpected vector num elements");
|
||||
|
||||
MVT ExtVT = MVT::getVectorVT(SrcVT.getScalarType(), 128 / EltSize);
|
||||
V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V,
|
||||
DAG.getIntPtrConstant(BroadcastIdx, DL));
|
||||
}
|
||||
|
@ -129,6 +129,48 @@ define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_2222(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_2222:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4f64_2222:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4f64_2222:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_2222_bc(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_2222_bc:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v4f64_2222_bc:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4f64_2222_bc:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2]
|
||||
; AVX512VL-NEXT: retq
|
||||
%tmp0 = bitcast <4 x i64> %a to <4 x double>
|
||||
%tmp1 = bitcast <4 x i64> %b to <4 x double>
|
||||
%shuffle = shufflevector <4 x double> %tmp0, <4 x double> %tmp1, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
|
||||
ret <4 x double> %shuffle
|
||||
}
|
||||
|
||||
define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: shuffle_v4f64_3330:
|
||||
; AVX1: # BB#0:
|
||||
|
@ -2048,6 +2048,24 @@ define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_44444444_bc(<8 x float> %a, <8 x float> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_44444444_bc:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
|
||||
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2OR512VL-LABEL: shuffle_v8i32_44444444_bc:
|
||||
; AVX2OR512VL: # BB#0:
|
||||
; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
%tmp0 = bitcast <8 x float> %a to <8 x i32>
|
||||
%tmp1 = bitcast <8 x float> %b to <8 x i32>
|
||||
%shuffle = shufflevector <8 x i32> %tmp0, <8 x i32> %tmp1, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
|
||||
define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX1-LABEL: shuffle_v8i32_5555uuuu:
|
||||
; AVX1: # BB#0:
|
||||
|
@ -23,6 +23,18 @@ define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08
|
||||
ret <16 x float> %shuffle
|
||||
}
|
||||
|
||||
define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc(<16 x i32> %a, <16 x i32> %b) {
|
||||
; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vextracti32x4 $2, %zmm0, %xmm0
|
||||
; ALL-NEXT: vpbroadcastd %xmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
%tmp0 = bitcast <16 x i32> %a to <16 x float>
|
||||
%tmp1 = bitcast <16 x i32> %b to <16 x float>
|
||||
%shuffle = shufflevector <16 x float> %tmp0, <16 x float> %tmp1, <16 x i32><i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x float> %shuffle
|
||||
}
|
||||
|
||||
define <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x float> %a, <16 x float> %b) {
|
||||
; ALL-LABEL: shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
|
||||
; ALL: # BB#0:
|
||||
|
@ -48,6 +48,24 @@ define <8 x double> @shuffle_v8f64_44444444(<8 x double> %a, <8 x double> %b) {
|
||||
ret <8 x double> %shuffle
|
||||
}
|
||||
|
||||
define <8 x double> @shuffle_v8f64_44444444_bc(<8 x i64> %a, <8 x i64> %b) {
|
||||
; AVX512F-LABEL: shuffle_v8f64_44444444_bc:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm0
|
||||
; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512F-32-LABEL: shuffle_v8f64_44444444_bc:
|
||||
; AVX512F-32: # BB#0:
|
||||
; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0
|
||||
; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0
|
||||
; AVX512F-32-NEXT: retl
|
||||
%tmp0 = bitcast <8 x i64> %a to <8 x double>
|
||||
%tmp1 = bitcast <8 x i64> %b to <8 x double>
|
||||
%shuffle = shufflevector <8 x double> %tmp0, <8 x double> %tmp1, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
|
||||
ret <8 x double> %shuffle
|
||||
}
|
||||
|
||||
define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
|
||||
; AVX512F-LABEL: shuffle_v8f64_00000010:
|
||||
; AVX512F: # BB#0:
|
||||
|
Loading…
Reference in New Issue
Block a user