[X86] AVX512: Add non-temporal stores

Note that I followed the AVX2 convention here and didn't add LLVM intrinsics
for stores.  These can be generated with the nontemporal hint on LLVM IR
stores (see new test). The GCC builtins are lowered directly into nontemporal
stores.

<rdar://problem/17082571>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211176 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Adam Nemet 2014-06-18 16:51:10 +00:00
parent 7fc69597b7
commit f1b790f791
3 changed files with 60 additions and 0 deletions

View File

@ -1800,6 +1800,35 @@ def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
(int_x86_avx512_movntdqa addr:$src))]>,
EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
// Prefer non-temporal over temporal versions
let AddedComplexity = 400, SchedRW = [WriteStore] in {
def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
(ins f512mem:$dst, VR512:$src),
"vmovntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v16f32 VR512:$src),
addr:$dst)],
IIC_SSE_MOVNT>,
EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
(ins f512mem:$dst, VR512:$src),
"vmovntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v8f64 VR512:$src),
addr:$dst)],
IIC_SSE_MOVNT>,
EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
(ins i512mem:$dst, VR512:$src),
"vmovntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v8i64 VR512:$src),
addr:$dst)],
IIC_SSE_MOVNT>,
EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
}
//===----------------------------------------------------------------------===//
// AVX-512 - Integer arithmetic
//

View File

@ -0,0 +1,19 @@
; RUN: llc < %s -march=x86-64 -mattr=+avx512f | FileCheck %s
define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x double> %CC, i32 %D, <8 x i64> %E, <8 x i64> %EE) {
; CHECK: vmovntps %z
%cast = bitcast i8* %B to <16 x float>*
%A2 = fadd <16 x float> %A, %AA
store <16 x float> %A2, <16 x float>* %cast, align 64, !nontemporal !0
; CHECK: vmovntdq %z
%cast1 = bitcast i8* %B to <8 x i64>*
%E2 = add <8 x i64> %E, %EE
store <8 x i64> %E2, <8 x i64>* %cast1, align 64, !nontemporal !0
; CHECK: vmovntpd %z
%cast2 = bitcast i8* %B to <8 x double>*
%C2 = fadd <8 x double> %C, %CC
store <8 x double> %C2, <8 x double>* %cast2, align 64, !nontemporal !0
ret void
}
!0 = metadata !{i32 1}

View File

@ -3163,3 +3163,15 @@ vmovntdqa (%r14,%rdx,2), %zmm18
// CHECK: vmovntdqa
// CHECK: encoding: [0x62,0xc2,0x7d,0x48,0x2a,0x7c,0x14,0x02]
vmovntdqa 128(%r12,%rdx), %zmm23
// CHECK: vmovntdq
// CHECK: encoding: [0x62,0x21,0x7d,0x48,0xe7,0x24,0xa9]
vmovntdq %zmm28, (%rcx,%r13,4)
// CHECK: vmovntpd
// CHECK: encoding: [0x62,0xf1,0xfd,0x48,0x2b,0xb2,0x04,0x00,0x00,0x00]
vmovntpd %zmm6, 4(%rdx)
// CHECK: vmovntps
// CHECK: encoding: [0x62,0x51,0x7c,0x48,0x2b,0x5c,0x8d,0x00]
vmovntps %zmm11, (%r13,%rcx,4)