mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-20 17:03:09 +00:00
[X86][AVX] Match broadcast loads through a bitcast
AVX1 v8i32/v4i64 shuffles are bitcasted to v8f32/v4f64, this patch peeks through bitcasts to check for a load node to allow broadcasts to occur. Follow up to D15310 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257055 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
db2b2ca62a
commit
9233e73bf3
@ -8163,6 +8163,11 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
|
||||
break;
|
||||
}
|
||||
|
||||
// Peek through any bitcast (only useful for loads).
|
||||
SDValue BC = V;
|
||||
while (BC.getOpcode() == ISD::BITCAST)
|
||||
BC = BC.getOperand(0);
|
||||
|
||||
// Check if this is a broadcast of a scalar. We special case lowering
|
||||
// for scalars so that we can more effectively fold with loads.
|
||||
// First, look through bitcast: if the original value has a larger element
|
||||
@ -8182,10 +8187,10 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
|
||||
// Only AVX2 has register broadcasts.
|
||||
if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
|
||||
return SDValue();
|
||||
} else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
|
||||
} else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
|
||||
// If we are broadcasting a load that is only used by the shuffle
|
||||
// then we can reduce the vector load to the broadcasted scalar load.
|
||||
LoadSDNode *Ld = cast<LoadSDNode>(V);
|
||||
LoadSDNode *Ld = cast<LoadSDNode>(BC);
|
||||
SDValue BaseAddr = Ld->getOperand(1);
|
||||
EVT AddrVT = BaseAddr.getValueType();
|
||||
EVT SVT = VT.getScalarType();
|
||||
|
@ -3,9 +3,7 @@
|
||||
define void @endless_loop() {
|
||||
; CHECK-LABEL: endless_loop:
|
||||
; CHECK-NEXT: # BB#0:
|
||||
; CHECK-NEXT: vmovaps (%eax), %ymm0
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
|
||||
; CHECK-NEXT: vbroadcastss (%eax), %ymm0
|
||||
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
|
||||
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
|
@ -130,10 +130,7 @@ entry:
|
||||
define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
|
||||
; CHECK-LABEL: load_splat_8i32_8i32_55555555:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: vmovaps (%rdi), %ymm0
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%ld = load <8 x i32>, <8 x i32>* %ptr
|
||||
@ -201,10 +198,7 @@ entry:
|
||||
define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
|
||||
; CHECK-LABEL: load_splat_4i64_4i64_2222:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: vmovapd (%rdi), %ymm0
|
||||
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%ld = load <4 x i64>, <4 x i64>* %ptr
|
||||
|
Loading…
x
Reference in New Issue
Block a user