mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-10 06:03:52 +00:00
[AVX-512] Add patterns to allow EVEX encoded stores of v16i16/v8i16/v16i8/v32i8 even when BWI is not supported.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278317 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bc13ca6202
commit
e0ce251bcc
@ -2793,6 +2793,28 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
|
||||
(v16i32 VR512:$src))),
|
||||
(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
|
||||
|
||||
let Predicates = [HasVLX, NoBWI] in {
|
||||
// 128-bit load/store without BWI.
|
||||
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
|
||||
(VMOVDQA32Z128mr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
|
||||
(VMOVDQA32Z128mr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
|
||||
(VMOVDQU32Z128mr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
||||
(VMOVDQU32Z128mr addr:$dst, VR128:$src)>;
|
||||
|
||||
// 256-bit load/store without BWI.
|
||||
def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst),
|
||||
(VMOVDQA32Z256mr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst),
|
||||
(VMOVDQA32Z256mr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(store (v16i16 VR256:$src), addr:$dst),
|
||||
(VMOVDQU32Z256mr addr:$dst, VR256:$src)>;
|
||||
def : Pat<(store (v32i8 VR256:$src), addr:$dst),
|
||||
(VMOVDQU32Z256mr addr:$dst, VR256:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasVLX] in {
|
||||
// Special patterns for storing subvector extracts of lower 128-bits of 256.
|
||||
// Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
|
||||
|
@ -1028,7 +1028,7 @@ let Predicates = [HasAVX, NoVLX] in {
|
||||
(VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
// 128-bit load/store
|
||||
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
|
||||
(VMOVAPSmr addr:$dst, VR128:$src)>;
|
||||
|
@ -4151,7 +4151,7 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: vmovdqa %xmm0, (%r14)
|
||||
; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
|
||||
; AVX512VL-NEXT: addq $16, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
; AVX512VL-NEXT: popq %r14
|
||||
@ -5944,7 +5944,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: vmovdqa %xmm0, (%r14)
|
||||
; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
|
||||
; AVX512VL-NEXT: addq $32, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
; AVX512VL-NEXT: popq %r14
|
||||
|
Loading…
Reference in New Issue
Block a user