[X86][AVX] Peek through bitcasts to find the source of broadcasts

AVX1 can only broadcast vectors as floats/doubles, so for 256-bit vectors we insert bitcasts if we are shuffling v8i32/v4i64 types. Unfortunately the presence of these bitcasts prevents the current broadcast lowering code from peeking through cases where we have concatenated / extracted vectors to create the 256-bit vectors.

This patch allows us to peek through bitcasts as long as the number of elements doesn't change (i.e. element bitwidth is the same) so the broadcast index is not affected.

Note this bitcast peek is different from the stage later on which doesn't care about the type and is just trying to find a load node.

Differential Revision: http://reviews.llvm.org/D21660

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273848 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Simon Pilgrim 2016-06-27 07:44:32 +00:00
parent baa0bfe5d8
commit 7c1d489b88
3 changed files with 12 additions and 12 deletions

View File

@ -8510,6 +8510,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
SDValue V = V1;
for (;;) {
switch (V.getOpcode()) {
case ISD::BITCAST: {
SDValue VSrc = V.getOperand(0);
if (NumElts != VSrc.getSimpleValueType().getVectorNumElements())
break;
V = VSrc;
continue;
}
case ISD::CONCAT_VECTORS: {
int OperandSize = Mask.size() / V.getNumOperands();
V = V.getOperand(BroadcastIdx / OperandSize);

View File

@ -173,14 +173,12 @@ define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtabl
; X32-LABEL: load_splat_8i32_4i32_33333333:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: vbroadcastss 12(%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_splat_8i32_4i32_33333333:
; X64: ## BB#0: ## %entry
; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: vbroadcastss 12(%rdi), %ymm0
; X64-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
@ -277,16 +275,12 @@ define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable re
; X32-LABEL: load_splat_4i64_2i64_1111:
; X32: ## BB#0: ## %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vmovaps (%eax), %xmm0
; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: vbroadcastsd 8(%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: load_splat_4i64_2i64_1111:
; X64: ## BB#0: ## %entry
; X64-NEXT: vmovaps (%rdi), %xmm0
; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0
; X64-NEXT: retq
entry:
%ld = load <2 x i64>, <2 x i64>* %ptr

View File

@ -1320,8 +1320,7 @@ define <4 x double> @splat_v4f64(<2 x double> %r) {
define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
; AVX1-LABEL: splat_mem_v4i64_from_v2i64:
; AVX1: # BB#0:
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splat_mem_v4i64_from_v2i64: