mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-24 19:44:49 +00:00
[SKX] Extended non-temporal load/store instructions for AVX512VL subsets.
Added avx512_movnt_vl multiclass for handling 256/128-bit forms of instruction. Added encoding and lowering tests. Reviewed by Elena Demikhovsky <elena.demikhovsky@intel.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215536 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5d16d6c3f0
commit
232202439a
@ -1954,8 +1954,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
llvm_i32_ty], [IntrNoMem, Commutative]>;
|
||||
def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3219,6 +3217,8 @@ let TargetPrefix = "x86" in {
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
|
||||
llvm_v8i64_ty, llvm_i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx512_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa512">,
|
||||
Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -2090,43 +2090,73 @@ def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - Non-temporals
|
||||
//===----------------------------------------------------------------------===//
|
||||
let SchedRW = [WriteLoad] in {
|
||||
def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
|
||||
(ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
|
||||
[(set VR512:$dst, (int_x86_avx512_movntdqa addr:$src))],
|
||||
SSEPackedInt>, EVEX, T8PD, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
|
||||
def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
|
||||
(ins i512mem:$src),
|
||||
"vmovntdqa\t{$src, $dst|$dst, $src}",
|
||||
[(set VR512:$dst,
|
||||
(int_x86_avx512_movntdqa addr:$src))]>,
|
||||
EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
let Predicates = [HasAVX512, HasVLX] in {
|
||||
def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
|
||||
(ins i256mem:$src),
|
||||
"vmovntdqa\t{$src, $dst|$dst, $src}", [],
|
||||
SSEPackedInt>, EVEX, T8PD, EVEX_V256,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
|
||||
// Prefer non-temporal over temporal versions
|
||||
let AddedComplexity = 400, SchedRW = [WriteStore] in {
|
||||
|
||||
def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
|
||||
(ins f512mem:$dst, VR512:$src),
|
||||
"vmovntps\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v16f32 VR512:$src),
|
||||
addr:$dst)],
|
||||
IIC_SSE_MOVNT>,
|
||||
EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
|
||||
(ins f512mem:$dst, VR512:$src),
|
||||
"vmovntpd\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v8f64 VR512:$src),
|
||||
addr:$dst)],
|
||||
IIC_SSE_MOVNT>,
|
||||
EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
|
||||
def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
|
||||
(ins i512mem:$dst, VR512:$src),
|
||||
"vmovntdq\t{$src, $dst|$dst, $src}",
|
||||
[(alignednontemporalstore (v8i64 VR512:$src),
|
||||
addr:$dst)],
|
||||
IIC_SSE_MOVNT>,
|
||||
EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
|
||||
def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
|
||||
(ins i128mem:$src),
|
||||
"vmovntdqa\t{$src, $dst|$dst, $src}", [],
|
||||
SSEPackedInt>, EVEX, T8PD, EVEX_V128,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_movnt<bits<8> opc, string OpcodeStr, PatFrag st_frag,
|
||||
ValueType OpVT, RegisterClass RC, X86MemOperand memop,
|
||||
Domain d, InstrItinClass itin = IIC_SSE_MOVNT> {
|
||||
let SchedRW = [WriteStore], mayStore = 1,
|
||||
AddedComplexity = 400 in
|
||||
def mr : AVX512PI<opc, MRMDestMem, (outs), (ins memop:$dst, RC:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(st_frag (OpVT RC:$src), addr:$dst)], d, itin>, EVEX;
|
||||
}
|
||||
|
||||
multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, PatFrag st_frag,
|
||||
string elty, string elsz, string vsz512,
|
||||
string vsz256, string vsz128, Domain d,
|
||||
Predicate prd, InstrItinClass itin = IIC_SSE_MOVNT> {
|
||||
let Predicates = [prd] in
|
||||
defm Z : avx512_movnt<opc, OpcodeStr, st_frag,
|
||||
!cast<ValueType>("v"##vsz512##elty##elsz), VR512,
|
||||
!cast<X86MemOperand>(elty##"512mem"), d, itin>,
|
||||
EVEX_V512;
|
||||
|
||||
let Predicates = [prd, HasVLX] in {
|
||||
defm Z256 : avx512_movnt<opc, OpcodeStr, st_frag,
|
||||
!cast<ValueType>("v"##vsz256##elty##elsz), VR256X,
|
||||
!cast<X86MemOperand>(elty##"256mem"), d, itin>,
|
||||
EVEX_V256;
|
||||
|
||||
defm Z128 : avx512_movnt<opc, OpcodeStr, st_frag,
|
||||
!cast<ValueType>("v"##vsz128##elty##elsz), VR128X,
|
||||
!cast<X86MemOperand>(elty##"128mem"), d, itin>,
|
||||
EVEX_V128;
|
||||
}
|
||||
}
|
||||
|
||||
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", alignednontemporalstore,
|
||||
"i", "64", "8", "4", "2", SSEPackedInt,
|
||||
HasAVX512>, PD, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", alignednontemporalstore,
|
||||
"f", "64", "8", "4", "2", SSEPackedDouble,
|
||||
HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", alignednontemporalstore,
|
||||
"f", "32", "16", "8", "4", SSEPackedSingle,
|
||||
HasAVX512>, PS, EVEX_CD8<32, CD8VF>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 - Integer arithmetic
|
||||
//
|
||||
|
@ -727,6 +727,7 @@ def HasDQI : Predicate<"Subtarget->hasDQI()">;
|
||||
def HasBWI : Predicate<"Subtarget->hasBWI()">;
|
||||
def HasVLX : Predicate<"Subtarget->hasVLX()">,
|
||||
AssemblerPredicate<"FeatureVLX", "AVX-512 VLX ISA">;
|
||||
def NoVLX : Predicate<"!Subtarget->hasVLX()">;
|
||||
|
||||
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
|
@ -3697,6 +3697,7 @@ let Predicates = [UseSSE1] in {
|
||||
|
||||
let AddedComplexity = 400 in { // Prefer non-temporal versions
|
||||
let SchedRW = [WriteStore] in {
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
|
||||
(ins f128mem:$dst, VR128:$src),
|
||||
"movntps\t{$src, $dst|$dst, $src}",
|
||||
@ -3737,6 +3738,7 @@ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
|
||||
[(alignednontemporalstore (v4i64 VR256:$src),
|
||||
addr:$dst)],
|
||||
IIC_SSE_MOVNT>, VEX, VEX_L;
|
||||
}
|
||||
|
||||
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||
"movntps\t{$src, $dst|$dst, $src}",
|
||||
|
34
test/CodeGen/X86/avx512vl-nontemporal.ll
Normal file
34
test/CodeGen/X86/avx512vl-nontemporal.ll
Normal file
@ -0,0 +1,34 @@
|
||||
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s
|
||||
|
||||
define void @f256(<8 x float> %A, <8 x float> %AA, i8* %B, <4 x double> %C, <4 x double> %CC, i32 %D, <4 x i64> %E, <4 x i64> %EE) {
|
||||
; CHECK: vmovntps %ymm{{.*}} ## encoding: [0x62
|
||||
%cast = bitcast i8* %B to <8 x float>*
|
||||
%A2 = fadd <8 x float> %A, %AA
|
||||
store <8 x float> %A2, <8 x float>* %cast, align 64, !nontemporal !0
|
||||
; CHECK: vmovntdq %ymm{{.*}} ## encoding: [0x62
|
||||
%cast1 = bitcast i8* %B to <4 x i64>*
|
||||
%E2 = add <4 x i64> %E, %EE
|
||||
store <4 x i64> %E2, <4 x i64>* %cast1, align 64, !nontemporal !0
|
||||
; CHECK: vmovntpd %ymm{{.*}} ## encoding: [0x62
|
||||
%cast2 = bitcast i8* %B to <4 x double>*
|
||||
%C2 = fadd <4 x double> %C, %CC
|
||||
store <4 x double> %C2, <4 x double>* %cast2, align 64, !nontemporal !0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f128(<4 x float> %A, <4 x float> %AA, i8* %B, <2 x double> %C, <2 x double> %CC, i32 %D, <2 x i64> %E, <2 x i64> %EE) {
|
||||
; CHECK: vmovntps %xmm{{.*}} ## encoding: [0x62
|
||||
%cast = bitcast i8* %B to <4 x float>*
|
||||
%A2 = fadd <4 x float> %A, %AA
|
||||
store <4 x float> %A2, <4 x float>* %cast, align 64, !nontemporal !0
|
||||
; CHECK: vmovntdq %xmm{{.*}} ## encoding: [0x62
|
||||
%cast1 = bitcast i8* %B to <2 x i64>*
|
||||
%E2 = add <2 x i64> %E, %EE
|
||||
store <2 x i64> %E2, <2 x i64>* %cast1, align 64, !nontemporal !0
|
||||
; CHECK: vmovntpd %xmm{{.*}} ## encoding: [0x62
|
||||
%cast2 = bitcast i8* %B to <2 x double>*
|
||||
%C2 = fadd <2 x double> %C, %CC
|
||||
store <2 x double> %C2, <2 x double>* %cast2, align 64, !nontemporal !0
|
||||
ret void
|
||||
}
|
||||
!0 = metadata !{i32 1}
|
@ -665,6 +665,102 @@
|
||||
// CHECK: encoding: [0x62,0xf1,0xfe,0x48,0x6f,0xb2,0xc0,0xdf,0xff,0xff]
|
||||
vmovdqu64 -8256(%rdx), %zmm6
|
||||
|
||||
// CHECK: vmovntdq %zmm24, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x01]
|
||||
vmovntdq %zmm24, (%rcx)
|
||||
|
||||
// CHECK: vmovntdq %zmm24, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x21,0x7d,0x48,0xe7,0x84,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntdq %zmm24, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovntdq %zmm24, 8128(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x42,0x7f]
|
||||
vmovntdq %zmm24, 8128(%rdx)
|
||||
|
||||
// CHECK: vmovntdq %zmm24, 8192(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x82,0x00,0x20,0x00,0x00]
|
||||
vmovntdq %zmm24, 8192(%rdx)
|
||||
|
||||
// CHECK: vmovntdq %zmm24, -8192(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x42,0x80]
|
||||
vmovntdq %zmm24, -8192(%rdx)
|
||||
|
||||
// CHECK: vmovntdq %zmm24, -8256(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7d,0x48,0xe7,0x82,0xc0,0xdf,0xff,0xff]
|
||||
vmovntdq %zmm24, -8256(%rdx)
|
||||
|
||||
// CHECK: vmovntdqa (%rcx), %zmm17
|
||||
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x09]
|
||||
vmovntdqa (%rcx), %zmm17
|
||||
|
||||
// CHECK: vmovntdqa 291(%rax,%r14,8), %zmm17
|
||||
// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x2a,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntdqa 291(%rax,%r14,8), %zmm17
|
||||
|
||||
// CHECK: vmovntdqa 8128(%rdx), %zmm17
|
||||
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x4a,0x7f]
|
||||
vmovntdqa 8128(%rdx), %zmm17
|
||||
|
||||
// CHECK: vmovntdqa 8192(%rdx), %zmm17
|
||||
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x8a,0x00,0x20,0x00,0x00]
|
||||
vmovntdqa 8192(%rdx), %zmm17
|
||||
|
||||
// CHECK: vmovntdqa -8192(%rdx), %zmm17
|
||||
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x4a,0x80]
|
||||
vmovntdqa -8192(%rdx), %zmm17
|
||||
|
||||
// CHECK: vmovntdqa -8256(%rdx), %zmm17
|
||||
// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x8a,0xc0,0xdf,0xff,0xff]
|
||||
vmovntdqa -8256(%rdx), %zmm17
|
||||
|
||||
// CHECK: vmovntpd %zmm17, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x09]
|
||||
vmovntpd %zmm17, (%rcx)
|
||||
|
||||
// CHECK: vmovntpd %zmm17, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa1,0xfd,0x48,0x2b,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntpd %zmm17, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovntpd %zmm17, 8128(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x4a,0x7f]
|
||||
vmovntpd %zmm17, 8128(%rdx)
|
||||
|
||||
// CHECK: vmovntpd %zmm17, 8192(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x8a,0x00,0x20,0x00,0x00]
|
||||
vmovntpd %zmm17, 8192(%rdx)
|
||||
|
||||
// CHECK: vmovntpd %zmm17, -8192(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x4a,0x80]
|
||||
vmovntpd %zmm17, -8192(%rdx)
|
||||
|
||||
// CHECK: vmovntpd %zmm17, -8256(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x8a,0xc0,0xdf,0xff,0xff]
|
||||
vmovntpd %zmm17, -8256(%rdx)
|
||||
|
||||
// CHECK: vmovntps %zmm5, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x29]
|
||||
vmovntps %zmm5, (%rcx)
|
||||
|
||||
// CHECK: vmovntps %zmm5, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xb1,0x7c,0x48,0x2b,0xac,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntps %zmm5, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovntps %zmm5, 8128(%rdx)
|
||||
// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x6a,0x7f]
|
||||
vmovntps %zmm5, 8128(%rdx)
|
||||
|
||||
// CHECK: vmovntps %zmm5, 8192(%rdx)
|
||||
// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0xaa,0x00,0x20,0x00,0x00]
|
||||
vmovntps %zmm5, 8192(%rdx)
|
||||
|
||||
// CHECK: vmovntps %zmm5, -8192(%rdx)
|
||||
// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x6a,0x80]
|
||||
vmovntps %zmm5, -8192(%rdx)
|
||||
|
||||
// CHECK: vmovntps %zmm5, -8256(%rdx)
|
||||
// CHECK: encoding: [0x62,0xf1,0x7c,0x48,0x2b,0xaa,0xc0,0xdf,0xff,0xff]
|
||||
vmovntps %zmm5, -8256(%rdx)
|
||||
|
||||
// CHECK: vmovupd %zmm9, %zmm27
|
||||
// CHECK: encoding: [0x62,0x41,0xfd,0x48,0x10,0xd9]
|
||||
vmovupd %zmm9, %zmm27
|
||||
|
@ -432,6 +432,198 @@
|
||||
// CHECK: encoding: [0x62,0x61,0xfe,0x28,0x6f,0xaa,0xe0,0xef,0xff,0xff]
|
||||
vmovdqu64 -4128(%rdx), %ymm29
|
||||
|
||||
// CHECK: vmovntdq %xmm22, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x31]
|
||||
vmovntdq %xmm22, (%rcx)
|
||||
|
||||
// CHECK: vmovntdq %xmm22, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xe7,0xb4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntdq %xmm22, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovntdq %xmm22, 2032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x72,0x7f]
|
||||
vmovntdq %xmm22, 2032(%rdx)
|
||||
|
||||
// CHECK: vmovntdq %xmm22, 2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0xb2,0x00,0x08,0x00,0x00]
|
||||
vmovntdq %xmm22, 2048(%rdx)
|
||||
|
||||
// CHECK: vmovntdq %xmm22, -2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x72,0x80]
|
||||
vmovntdq %xmm22, -2048(%rdx)
|
||||
|
||||
// CHECK: vmovntdq %xmm22, -2064(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0xe7,0xb2,0xf0,0xf7,0xff,0xff]
|
||||
vmovntdq %xmm22, -2064(%rdx)
|
||||
|
||||
// CHECK: vmovntdq %ymm19, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x19]
|
||||
vmovntdq %ymm19, (%rcx)
|
||||
|
||||
// CHECK: vmovntdq %ymm19, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa1,0x7d,0x28,0xe7,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntdq %ymm19, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovntdq %ymm19, 4064(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x5a,0x7f]
|
||||
vmovntdq %ymm19, 4064(%rdx)
|
||||
|
||||
// CHECK: vmovntdq %ymm19, 4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x9a,0x00,0x10,0x00,0x00]
|
||||
vmovntdq %ymm19, 4096(%rdx)
|
||||
|
||||
// CHECK: vmovntdq %ymm19, -4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x5a,0x80]
|
||||
vmovntdq %ymm19, -4096(%rdx)
|
||||
|
||||
// CHECK: vmovntdq %ymm19, -4128(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x9a,0xe0,0xef,0xff,0xff]
|
||||
vmovntdq %ymm19, -4128(%rdx)
|
||||
|
||||
// CHECK: vmovntdqa (%rcx), %xmm24
|
||||
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x01]
|
||||
vmovntdqa (%rcx), %xmm24
|
||||
|
||||
// CHECK: vmovntdqa 291(%rax,%r14,8), %xmm24
|
||||
// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x2a,0x84,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntdqa 291(%rax,%r14,8), %xmm24
|
||||
|
||||
// CHECK: vmovntdqa 2032(%rdx), %xmm24
|
||||
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x42,0x7f]
|
||||
vmovntdqa 2032(%rdx), %xmm24
|
||||
|
||||
// CHECK: vmovntdqa 2048(%rdx), %xmm24
|
||||
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x82,0x00,0x08,0x00,0x00]
|
||||
vmovntdqa 2048(%rdx), %xmm24
|
||||
|
||||
// CHECK: vmovntdqa -2048(%rdx), %xmm24
|
||||
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x42,0x80]
|
||||
vmovntdqa -2048(%rdx), %xmm24
|
||||
|
||||
// CHECK: vmovntdqa -2064(%rdx), %xmm24
|
||||
// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x2a,0x82,0xf0,0xf7,0xff,0xff]
|
||||
vmovntdqa -2064(%rdx), %xmm24
|
||||
|
||||
// CHECK: vmovntdqa (%rcx), %ymm28
|
||||
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0x21]
|
||||
vmovntdqa (%rcx), %ymm28
|
||||
|
||||
// CHECK: vmovntdqa 291(%rax,%r14,8), %ymm28
|
||||
// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x2a,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntdqa 291(%rax,%r14,8), %ymm28
|
||||
|
||||
// CHECK: vmovntdqa 4064(%rdx), %ymm28
|
||||
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0x62,0x7f]
|
||||
vmovntdqa 4064(%rdx), %ymm28
|
||||
|
||||
// CHECK: vmovntdqa 4096(%rdx), %ymm28
|
||||
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0xa2,0x00,0x10,0x00,0x00]
|
||||
vmovntdqa 4096(%rdx), %ymm28
|
||||
|
||||
// CHECK: vmovntdqa -4096(%rdx), %ymm28
|
||||
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0x62,0x80]
|
||||
vmovntdqa -4096(%rdx), %ymm28
|
||||
|
||||
// CHECK: vmovntdqa -4128(%rdx), %ymm28
|
||||
// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x2a,0xa2,0xe0,0xef,0xff,0xff]
|
||||
vmovntdqa -4128(%rdx), %ymm28
|
||||
|
||||
// CHECK: vmovntpd %xmm17, (%rcx)
|
||||
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x09]
|
||||
vmovntpd %xmm17, (%rcx)
|
||||
|
||||
// CHECK: vmovntpd %xmm17, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0x2b,0x8c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntpd %xmm17, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovntpd %xmm17, 2032(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x4a,0x7f]
|
||||
vmovntpd %xmm17, 2032(%rdx)
|
||||
|
||||
// CHECK: vmovntpd %xmm17, 2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x8a,0x00,0x08,0x00,0x00]
|
||||
vmovntpd %xmm17, 2048(%rdx)
|
||||
|
||||
// CHECK: vmovntpd %xmm17, -2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x4a,0x80]
|
||||
vmovntpd %xmm17, -2048(%rdx)
|
||||
|
||||
// CHECK: vmovntpd %xmm17, -2064(%rdx)
|
||||
// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x8a,0xf0,0xf7,0xff,0xff]
|
||||
vmovntpd %xmm17, -2064(%rdx)
|
||||
|
||||
// CHECK: vmovntpd %ymm27, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x19]
|
||||
vmovntpd %ymm27, (%rcx)
|
||||
|
||||
// CHECK: vmovntpd %ymm27, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x21,0xfd,0x28,0x2b,0x9c,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntpd %ymm27, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovntpd %ymm27, 4064(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x5a,0x7f]
|
||||
vmovntpd %ymm27, 4064(%rdx)
|
||||
|
||||
// CHECK: vmovntpd %ymm27, 4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x9a,0x00,0x10,0x00,0x00]
|
||||
vmovntpd %ymm27, 4096(%rdx)
|
||||
|
||||
// CHECK: vmovntpd %ymm27, -4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x5a,0x80]
|
||||
vmovntpd %ymm27, -4096(%rdx)
|
||||
|
||||
// CHECK: vmovntpd %ymm27, -4128(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0xfd,0x28,0x2b,0x9a,0xe0,0xef,0xff,0xff]
|
||||
vmovntpd %ymm27, -4128(%rdx)
|
||||
|
||||
// CHECK: vmovntps %xmm26, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x11]
|
||||
vmovntps %xmm26, (%rcx)
|
||||
|
||||
// CHECK: vmovntps %xmm26, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x21,0x7c,0x08,0x2b,0x94,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntps %xmm26, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovntps %xmm26, 2032(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x52,0x7f]
|
||||
vmovntps %xmm26, 2032(%rdx)
|
||||
|
||||
// CHECK: vmovntps %xmm26, 2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x92,0x00,0x08,0x00,0x00]
|
||||
vmovntps %xmm26, 2048(%rdx)
|
||||
|
||||
// CHECK: vmovntps %xmm26, -2048(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x52,0x80]
|
||||
vmovntps %xmm26, -2048(%rdx)
|
||||
|
||||
// CHECK: vmovntps %xmm26, -2064(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x08,0x2b,0x92,0xf0,0xf7,0xff,0xff]
|
||||
vmovntps %xmm26, -2064(%rdx)
|
||||
|
||||
// CHECK: vmovntps %ymm28, (%rcx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0x21]
|
||||
vmovntps %ymm28, (%rcx)
|
||||
|
||||
// CHECK: vmovntps %ymm28, 291(%rax,%r14,8)
|
||||
// CHECK: encoding: [0x62,0x21,0x7c,0x28,0x2b,0xa4,0xf0,0x23,0x01,0x00,0x00]
|
||||
vmovntps %ymm28, 291(%rax,%r14,8)
|
||||
|
||||
// CHECK: vmovntps %ymm28, 4064(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0x62,0x7f]
|
||||
vmovntps %ymm28, 4064(%rdx)
|
||||
|
||||
// CHECK: vmovntps %ymm28, 4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0xa2,0x00,0x10,0x00,0x00]
|
||||
vmovntps %ymm28, 4096(%rdx)
|
||||
|
||||
// CHECK: vmovntps %ymm28, -4096(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0x62,0x80]
|
||||
vmovntps %ymm28, -4096(%rdx)
|
||||
|
||||
// CHECK: vmovntps %ymm28, -4128(%rdx)
|
||||
// CHECK: encoding: [0x62,0x61,0x7c,0x28,0x2b,0xa2,0xe0,0xef,0xff,0xff]
|
||||
vmovntps %ymm28, -4128(%rdx)
|
||||
|
||||
// CHECK: vmovupd %xmm22, %xmm24
|
||||
// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x10,0xc6]
|
||||
vmovupd %xmm22, %xmm24
|
||||
|
Loading…
x
Reference in New Issue
Block a user