mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-22 14:05:03 +00:00
[X86] AVX512: Add non-temporal stores
Note that I followed the AVX2 convention here and didn't add LLVM intrinsics for stores. These can be generated with the nontemporal hint on LLVM IR stores (see new test). The GCC builtins are lowered directly into nontemporal stores. <rdar://problem/17082571> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211176 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7fc69597b7
commit
f1b790f791
@ -1800,6 +1800,35 @@ def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
|
||||
(int_x86_avx512_movntdqa addr:$src))]>,
|
||||
EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
// Prefer non-temporal over temporal versions
|
||||
let AddedComplexity = 400, SchedRW = [WriteStore] in {
|
||||
|
||||
def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
|
||||
(ins f512mem:$dst, VR512:$src),
|
||||
"vmovntps\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v16f32 VR512:$src),
|
||||
addr:$dst)],
|
||||
IIC_SSE_MOVNT>,
|
||||
EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
|
||||
(ins f512mem:$dst, VR512:$src),
|
||||
"vmovntpd\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v8f64 VR512:$src),
|
||||
addr:$dst)],
|
||||
IIC_SSE_MOVNT>,
|
||||
EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
|
||||
def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
|
||||
(ins i512mem:$dst, VR512:$src),
|
||||
"vmovntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v8i64 VR512:$src),
|
||||
addr:$dst)],
|
||||
IIC_SSE_MOVNT>,
|
||||
EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - Integer arithmetic
|
||||
//
|
||||
|
19
test/CodeGen/X86/avx512-nontemporal.ll
Normal file
19
test/CodeGen/X86/avx512-nontemporal.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s
|
||||
|
||||
define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) {
|
||||
; CHECK: vmovntps %z
|
||||
%cast = bitcast i8* %B to <16 x float>*
|
||||
%A2 = fadd <16 x float> %A, %AA
|
||||
store <16 x float> %A2, <16 x float>* %cast, align 64, !nontemporal !0
|
||||
; CHECK: vmovntdq %z
|
||||
%cast1 = bitcast i8* %B to <8 x i64>*
|
||||
%E2 = add <8 x i64> %E, %EE
|
||||
store <8 x i64> %E2, <8 x i64>* %cast1, align 64, !nontemporal !0
|
||||
; CHECK: vmovntpd %z
|
||||
%cast2 = bitcast i8* %B to <8 x double>*
|
||||
%C2 = fadd <8 x double> %C, %CC
|
||||
store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = metadata !{i32 1}
|
@ -3163,3 +3163,15 @@ vmovntdqa (%r14,%rdx,2), %zmm18
|
||||
// CHECK: vmovntdqa
|
||||
// CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x7c,0x14,0x02]
|
||||
vmovntdqa 128(%r12,%rdx), %zmm23
|
||||
|
||||
// CHECK: vmovntdq
|
||||
// CHECK: encoding: [0x62,0x21,0x7d,0x48,0xe7,0x24,0xa9]
|
||||
vmovntdq %zmm28, (%rcx,%r13,4)
|
||||
|
||||
// CHECK: vmovntpd
|
||||
// CHECK: encoding: [0x62,0xf1,0xfd,0x48,0x2b,0xb2,0x04,0x00,0x00,0x00]
|
||||
vmovntpd %zmm6, 4(%rdx)
|
||||
|
||||
// CHECK: vmovntps
|
||||
// CHECK: encoding: [0x62,0x51,0x7c,0x48,0x2b,0x5c,0x8d,0x00]
|
||||
vmovntps %zmm11, (%r13,%rcx,4)
|
||||
|
Loading…
x
Reference in New Issue
Block a user