mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 13:40:30 +00:00
[AVX512] Add PALIGNR shuffle lowering for v32i16 and v16i32.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271870 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5cc4ee2898
commit
856b53e006
@ -7679,6 +7679,8 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
|
||||
|
||||
// SSSE3 targets can use the palignr instruction.
|
||||
if (Subtarget.hasSSSE3()) {
|
||||
assert((!VT.is512BitVector() || Subtarget.hasBWI()) &&
|
||||
"512-bit PALIGNR requires BWI instructions");
|
||||
// Cast the inputs to i8 vector of correct length to match PALIGNR.
|
||||
MVT AlignVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes);
|
||||
Lo = DAG.getBitcast(AlignVT, Lo);
|
||||
@ -11760,6 +11762,12 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
|
||||
return Unpck;
|
||||
|
||||
// Try to use byte rotation instructions.
|
||||
if (Subtarget.hasBWI())
|
||||
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
|
||||
DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG))
|
||||
return Rotate;
|
||||
|
||||
return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
|
||||
}
|
||||
|
||||
@ -11775,6 +11783,11 @@ static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||
assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
|
||||
assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
|
||||
|
||||
// Try to use byte rotation instructions.
|
||||
if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
|
||||
DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG))
|
||||
return Rotate;
|
||||
|
||||
return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,4 @@
|
||||
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
|
||||
@ -106,9 +105,7 @@ define i32 @sad_16i8() nounwind {
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: movl $1, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -359,9 +356,7 @@ define i32 @sad_32i8() nounwind {
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: movl $1, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
@ -846,9 +841,7 @@ define i32 @sad_avx64i8() nounwind {
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7]
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: movl $1, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm1
|
||||
; AVX512BW-NEXT: vpalignr $4, %zmm0, %zmm0, %zmm1
|
||||
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: vmovd %xmm0, %eax
|
||||
; AVX512BW-NEXT: retq
|
||||
|
@ -13,9 +13,7 @@ define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: movl $1, %eax
|
||||
; AVX512BW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vpalignr $2, %zmm0, %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
|
||||
|
Loading…
Reference in New Issue
Block a user