mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-13 14:47:00 +00:00
[X86][SSE] When lowering a 256-bit shuffle as PMOVZX, reduce the input vector to the lower 128-bit subvector.
Most often as not this is what it started out as, the extraction is zero-cost on AVX and the PMOVZX/PMOVSX folding logic is based around 128-bit loads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@270858 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
32afd6e7e7
commit
1f433c478c
@ -7915,7 +7915,13 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
|
||||
return SDValue();
|
||||
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
|
||||
NumElements / Scale);
|
||||
InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, ShuffleOffset(InputV));
|
||||
InputV = ShuffleOffset(InputV);
|
||||
|
||||
// For 256-bit vectors, we only need the lower (128-bit) input half.
|
||||
if (VT.is256BitVector())
|
||||
InputV = extract128BitVector(InputV, 0, DAG, DL);
|
||||
|
||||
InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV);
|
||||
return DAG.getBitcast(VT, InputV);
|
||||
}
|
||||
|
||||
|
@ -707,14 +707,12 @@ define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) {
|
||||
;
|
||||
; AVX2-LABEL: load_zext_16i8_to_8i32:
|
||||
; AVX2: # BB#0: # %entry
|
||||
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: load_zext_16i8_to_8i32:
|
||||
; AVX512: # BB#0: # %entry
|
||||
; AVX512-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
|
||||
; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%X = load <16 x i8>, <16 x i8>* %ptr
|
||||
|
Loading…
Reference in New Issue
Block a user