mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-26 22:45:05 +00:00
[AVX-512] Teach fastisel load/store handling to use EVEX encoded instructions for 128/256-bit vectors and scalar single/double.
Still need to fix the register classes to allow the extended range of registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280682 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d844741822
commit
610e45c3d2
@ -351,6 +351,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
bool HasSSE41 = Subtarget->hasSSE41();
|
||||
bool HasAVX = Subtarget->hasAVX();
|
||||
bool HasAVX2 = Subtarget->hasAVX2();
|
||||
bool HasAVX512 = Subtarget->hasAVX512();
|
||||
bool HasVLX = Subtarget->hasVLX();
|
||||
bool IsNonTemporal = MMO && MMO->isNonTemporal();
|
||||
|
||||
// Get opcode and regclass of the output for the given load instruction.
|
||||
@ -378,7 +380,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
break;
|
||||
case MVT::f32:
|
||||
if (X86ScalarSSEf32) {
|
||||
Opc = HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
|
||||
Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
|
||||
RC = &X86::FR32RegClass;
|
||||
} else {
|
||||
Opc = X86::LD_Fp32m;
|
||||
@ -387,7 +389,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
break;
|
||||
case MVT::f64:
|
||||
if (X86ScalarSSEf64) {
|
||||
Opc = HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
|
||||
Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
|
||||
RC = &X86::FR64RegClass;
|
||||
} else {
|
||||
Opc = X86::LD_Fp64m;
|
||||
@ -399,20 +401,26 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
return false;
|
||||
case MVT::v4f32:
|
||||
if (IsNonTemporal && Alignment >= 16 && HasSSE41)
|
||||
Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
|
||||
Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
|
||||
HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
|
||||
else if (Alignment >= 16)
|
||||
Opc = HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
|
||||
Opc = HasVLX ? X86::VMOVAPSZ128rm :
|
||||
HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
|
||||
else
|
||||
Opc = HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
|
||||
Opc = HasVLX ? X86::VMOVUPSZ128rm :
|
||||
HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
|
||||
RC = &X86::VR128RegClass;
|
||||
break;
|
||||
case MVT::v2f64:
|
||||
if (IsNonTemporal && Alignment >= 16 && HasSSE41)
|
||||
Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
|
||||
Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
|
||||
HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
|
||||
else if (Alignment >= 16)
|
||||
Opc = HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
|
||||
Opc = HasVLX ? X86::VMOVAPDZ128rm :
|
||||
HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
|
||||
else
|
||||
Opc = HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
|
||||
Opc = HasVLX ? X86::VMOVUPDZ128rm :
|
||||
HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
|
||||
RC = &X86::VR128RegClass;
|
||||
break;
|
||||
case MVT::v4i32:
|
||||
@ -420,27 +428,34 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
case MVT::v8i16:
|
||||
case MVT::v16i8:
|
||||
if (IsNonTemporal && Alignment >= 16)
|
||||
Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
|
||||
Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
|
||||
HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
|
||||
else if (Alignment >= 16)
|
||||
Opc = HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
|
||||
Opc = HasVLX ? X86::VMOVDQA64Z128rm :
|
||||
HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
|
||||
else
|
||||
Opc = HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
|
||||
Opc = HasVLX ? X86::VMOVDQU64Z128rm :
|
||||
HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
|
||||
RC = &X86::VR128RegClass;
|
||||
break;
|
||||
case MVT::v8f32:
|
||||
assert(HasAVX);
|
||||
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
|
||||
Opc = X86::VMOVNTDQAYrm;
|
||||
Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
|
||||
else if (Alignment >= 32)
|
||||
Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
|
||||
else
|
||||
Opc = (Alignment >= 32) ? X86::VMOVAPSYrm : X86::VMOVUPSYrm;
|
||||
Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
|
||||
RC = &X86::VR256RegClass;
|
||||
break;
|
||||
case MVT::v4f64:
|
||||
assert(HasAVX);
|
||||
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
|
||||
Opc = X86::VMOVNTDQAYrm;
|
||||
else if (Alignment >= 32)
|
||||
Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
|
||||
else
|
||||
Opc = (Alignment >= 32) ? X86::VMOVAPDYrm : X86::VMOVUPDYrm;
|
||||
Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
|
||||
RC = &X86::VR256RegClass;
|
||||
break;
|
||||
case MVT::v8i32:
|
||||
@ -450,12 +465,14 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
assert(HasAVX);
|
||||
if (IsNonTemporal && Alignment >= 32 && HasAVX2)
|
||||
Opc = X86::VMOVNTDQAYrm;
|
||||
else if (Alignment >= 32)
|
||||
Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
|
||||
else
|
||||
Opc = (Alignment >= 32) ? X86::VMOVDQAYrm : X86::VMOVDQUYrm;
|
||||
Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
|
||||
RC = &X86::VR256RegClass;
|
||||
break;
|
||||
case MVT::v16f32:
|
||||
assert(Subtarget->hasAVX512());
|
||||
assert(HasAVX512);
|
||||
if (IsNonTemporal && Alignment >= 64)
|
||||
Opc = X86::VMOVNTDQAZrm;
|
||||
else
|
||||
@ -463,7 +480,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
RC = &X86::VR512RegClass;
|
||||
break;
|
||||
case MVT::v8f64:
|
||||
assert(Subtarget->hasAVX512());
|
||||
assert(HasAVX512);
|
||||
if (IsNonTemporal && Alignment >= 64)
|
||||
Opc = X86::VMOVNTDQAZrm;
|
||||
else
|
||||
@ -474,7 +491,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
case MVT::v16i32:
|
||||
case MVT::v32i16:
|
||||
case MVT::v64i8:
|
||||
assert(Subtarget->hasAVX512());
|
||||
assert(HasAVX512);
|
||||
// Note: There are a lot more choices based on type with AVX-512, but
|
||||
// there's really no advantage when the load isn't masked.
|
||||
if (IsNonTemporal && Alignment >= 64)
|
||||
@ -504,6 +521,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
||||
bool HasSSE2 = Subtarget->hasSSE2();
|
||||
bool HasSSE4A = Subtarget->hasSSE4A();
|
||||
bool HasAVX = Subtarget->hasAVX();
|
||||
bool HasAVX512 = Subtarget->hasAVX512();
|
||||
bool HasVLX = Subtarget->hasVLX();
|
||||
bool IsNonTemporal = MMO && MMO->isNonTemporal();
|
||||
|
||||
// Get opcode and regclass of the output for the given store instruction.
|
||||
@ -534,7 +553,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
||||
if (IsNonTemporal && HasSSE4A)
|
||||
Opc = X86::MOVNTSS;
|
||||
else
|
||||
Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
|
||||
Opc = HasAVX512 ? X86::VMOVSSZmr :
|
||||
HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
|
||||
} else
|
||||
Opc = X86::ST_Fp32m;
|
||||
break;
|
||||
@ -543,27 +563,34 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
||||
if (IsNonTemporal && HasSSE4A)
|
||||
Opc = X86::MOVNTSD;
|
||||
else
|
||||
Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
|
||||
Opc = HasAVX512 ? X86::VMOVSDZmr :
|
||||
HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
|
||||
} else
|
||||
Opc = X86::ST_Fp64m;
|
||||
break;
|
||||
case MVT::v4f32:
|
||||
if (Aligned) {
|
||||
if (IsNonTemporal)
|
||||
Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
|
||||
Opc = HasVLX ? X86::VMOVNTPSZ128mr :
|
||||
HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
|
||||
else
|
||||
Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
|
||||
Opc = HasVLX ? X86::VMOVAPSZ128mr :
|
||||
HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
|
||||
} else
|
||||
Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
|
||||
Opc = HasVLX ? X86::VMOVUPSZ128mr :
|
||||
HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
|
||||
break;
|
||||
case MVT::v2f64:
|
||||
if (Aligned) {
|
||||
if (IsNonTemporal)
|
||||
Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
|
||||
Opc = HasVLX ? X86::VMOVNTPDZ128mr :
|
||||
HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
|
||||
else
|
||||
Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
|
||||
Opc = HasVLX ? X86::VMOVAPDZ128mr :
|
||||
HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
|
||||
} else
|
||||
Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
|
||||
Opc = HasVLX ? X86::VMOVUPDZ128mr :
|
||||
HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
|
||||
break;
|
||||
case MVT::v4i32:
|
||||
case MVT::v2i64:
|
||||
@ -571,45 +598,57 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
||||
case MVT::v16i8:
|
||||
if (Aligned) {
|
||||
if (IsNonTemporal)
|
||||
Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
|
||||
Opc = HasVLX ? X86::VMOVNTDQZ128mr :
|
||||
HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
|
||||
else
|
||||
Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
|
||||
Opc = HasVLX ? X86::VMOVDQA64Z128mr :
|
||||
HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
|
||||
} else
|
||||
Opc = HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
|
||||
Opc = HasVLX ? X86::VMOVDQU64Z128mr :
|
||||
HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
|
||||
break;
|
||||
case MVT::v8f32:
|
||||
assert(HasAVX);
|
||||
if (Aligned)
|
||||
Opc = IsNonTemporal ? X86::VMOVNTPSYmr : X86::VMOVAPSYmr;
|
||||
else
|
||||
Opc = X86::VMOVUPSYmr;
|
||||
if (Aligned) {
|
||||
if (IsNonTemporal)
|
||||
Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
|
||||
else
|
||||
Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
|
||||
} else
|
||||
Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
|
||||
break;
|
||||
case MVT::v4f64:
|
||||
assert(HasAVX);
|
||||
if (Aligned) {
|
||||
Opc = IsNonTemporal ? X86::VMOVNTPDYmr : X86::VMOVAPDYmr;
|
||||
if (IsNonTemporal)
|
||||
Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
|
||||
else
|
||||
Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
|
||||
} else
|
||||
Opc = X86::VMOVUPDYmr;
|
||||
Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
|
||||
break;
|
||||
case MVT::v8i32:
|
||||
case MVT::v4i64:
|
||||
case MVT::v16i16:
|
||||
case MVT::v32i8:
|
||||
assert(HasAVX);
|
||||
if (Aligned)
|
||||
Opc = IsNonTemporal ? X86::VMOVNTDQYmr : X86::VMOVDQAYmr;
|
||||
else
|
||||
Opc = X86::VMOVDQUYmr;
|
||||
if (Aligned) {
|
||||
if (IsNonTemporal)
|
||||
Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
|
||||
else
|
||||
Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
|
||||
} else
|
||||
Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
|
||||
break;
|
||||
case MVT::v16f32:
|
||||
assert(Subtarget->hasAVX512());
|
||||
assert(HasAVX512);
|
||||
if (Aligned)
|
||||
Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
|
||||
else
|
||||
Opc = X86::VMOVUPSZmr;
|
||||
break;
|
||||
case MVT::v8f64:
|
||||
assert(Subtarget->hasAVX512());
|
||||
assert(HasAVX512);
|
||||
if (Aligned) {
|
||||
Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
|
||||
} else
|
||||
@ -619,7 +658,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
||||
case MVT::v16i32:
|
||||
case MVT::v32i16:
|
||||
case MVT::v64i8:
|
||||
assert(Subtarget->hasAVX512());
|
||||
assert(HasAVX512);
|
||||
// Note: There are a lot more choices based on type with AVX-512, but
|
||||
// there's really no advantage when the store isn't masked.
|
||||
if (Aligned)
|
||||
|
@ -58,11 +58,11 @@ define <4 x i32> @test_store_4xi32(<4 x i32>* nocapture %addr, <4 x i32> %value,
|
||||
; SSE64-NEXT: movdqu %xmm0, (%eax)
|
||||
; SSE64-NEXT: retl
|
||||
;
|
||||
; AVX32-LABEL: test_store_4xi32:
|
||||
; AVX32: # BB#0:
|
||||
; AVX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX32-NEXT: vmovdqu %xmm0, (%rdi)
|
||||
; AVX32-NEXT: retq
|
||||
; AVXONLY32-LABEL: test_store_4xi32:
|
||||
; AVXONLY32: # BB#0:
|
||||
; AVXONLY32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVXONLY32-NEXT: vmovdqu %xmm0, (%rdi)
|
||||
; AVXONLY32-NEXT: retq
|
||||
;
|
||||
; AVX64-LABEL: test_store_4xi32:
|
||||
; AVX64: # BB#0:
|
||||
@ -70,6 +70,18 @@ define <4 x i32> @test_store_4xi32(<4 x i32>* nocapture %addr, <4 x i32> %value,
|
||||
; AVX64-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX64-NEXT: vmovdqu %xmm0, (%eax)
|
||||
; AVX64-NEXT: retl
|
||||
;
|
||||
; KNL32-LABEL: test_store_4xi32:
|
||||
; KNL32: # BB#0:
|
||||
; KNL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; KNL32-NEXT: vmovdqu %xmm0, (%rdi)
|
||||
; KNL32-NEXT: retq
|
||||
;
|
||||
; SKX32-LABEL: test_store_4xi32:
|
||||
; SKX32: # BB#0:
|
||||
; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; SKX32-NEXT: vmovdqu64 %xmm0, (%rdi)
|
||||
; SKX32-NEXT: retq
|
||||
%foo = add <4 x i32> %value, %value2 ; to force integer type on store
|
||||
store <4 x i32> %foo, <4 x i32>* %addr, align 1
|
||||
ret <4 x i32> %foo
|
||||
@ -89,11 +101,11 @@ define <4 x i32> @test_store_4xi32_aligned(<4 x i32>* nocapture %addr, <4 x i32>
|
||||
; SSE64-NEXT: movdqa %xmm0, (%eax)
|
||||
; SSE64-NEXT: retl
|
||||
;
|
||||
; AVX32-LABEL: test_store_4xi32_aligned:
|
||||
; AVX32: # BB#0:
|
||||
; AVX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX32-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; AVX32-NEXT: retq
|
||||
; AVXONLY32-LABEL: test_store_4xi32_aligned:
|
||||
; AVXONLY32: # BB#0:
|
||||
; AVXONLY32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVXONLY32-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; AVXONLY32-NEXT: retq
|
||||
;
|
||||
; AVX64-LABEL: test_store_4xi32_aligned:
|
||||
; AVX64: # BB#0:
|
||||
@ -101,6 +113,18 @@ define <4 x i32> @test_store_4xi32_aligned(<4 x i32>* nocapture %addr, <4 x i32>
|
||||
; AVX64-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX64-NEXT: vmovdqa %xmm0, (%eax)
|
||||
; AVX64-NEXT: retl
|
||||
;
|
||||
; KNL32-LABEL: test_store_4xi32_aligned:
|
||||
; KNL32: # BB#0:
|
||||
; KNL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; KNL32-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; KNL32-NEXT: retq
|
||||
;
|
||||
; SKX32-LABEL: test_store_4xi32_aligned:
|
||||
; SKX32: # BB#0:
|
||||
; SKX32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; SKX32-NEXT: vmovdqa64 %xmm0, (%rdi)
|
||||
; SKX32-NEXT: retq
|
||||
%foo = add <4 x i32> %value, %value2 ; to force integer type on store
|
||||
store <4 x i32> %foo, <4 x i32>* %addr, align 16
|
||||
ret <4 x i32> %foo
|
||||
|
@ -13,10 +13,20 @@ define <16 x i8> @test_v16i8(<16 x i8>* %V) {
|
||||
; SSE-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16i8:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v16i8:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v16i8:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v16i8:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <16 x i8>, <16 x i8>* %V, align 16
|
||||
ret <16 x i8> %0
|
||||
@ -28,10 +38,20 @@ define <8 x i16> @test_v8i16(<8 x i16>* %V) {
|
||||
; SSE-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8i16:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v8i16:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v8i16:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v8i16:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <8 x i16>, <8 x i16>* %V, align 16
|
||||
ret <8 x i16> %0
|
||||
@ -43,10 +63,20 @@ define <4 x i32> @test_v4i32(<4 x i32>* %V) {
|
||||
; SSE-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4i32:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v4i32:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v4i32:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v4i32:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <4 x i32>, <4 x i32>* %V, align 16
|
||||
ret <4 x i32> %0
|
||||
@ -58,10 +88,20 @@ define <2 x i64> @test_v2i64(<2 x i64>* %V) {
|
||||
; SSE-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v2i64:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v2i64:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v2i64:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v2i64:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <2 x i64>, <2 x i64>* %V, align 16
|
||||
ret <2 x i64> %0
|
||||
@ -73,10 +113,20 @@ define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) {
|
||||
; SSE-NEXT: movdqu (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16i8_unaligned:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v16i8_unaligned:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v16i8_unaligned:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v16i8_unaligned:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <16 x i8>, <16 x i8>* %V, align 4
|
||||
ret <16 x i8> %0
|
||||
@ -88,10 +138,20 @@ define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) {
|
||||
; SSE-NEXT: movdqu (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8i16_unaligned:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v8i16_unaligned:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v8i16_unaligned:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v8i16_unaligned:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <8 x i16>, <8 x i16>* %V, align 4
|
||||
ret <8 x i16> %0
|
||||
@ -103,10 +163,20 @@ define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) {
|
||||
; SSE-NEXT: movdqu (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4i32_unaligned:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v4i32_unaligned:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v4i32_unaligned:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v4i32_unaligned:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <4 x i32>, <4 x i32>* %V, align 4
|
||||
ret <4 x i32> %0
|
||||
@ -118,10 +188,20 @@ define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) {
|
||||
; SSE-NEXT: movdqu (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v2i64_unaligned:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v2i64_unaligned:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v2i64_unaligned:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqu (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v2i64_unaligned:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <2 x i64>, <2 x i64>* %V, align 4
|
||||
ret <2 x i64> %0
|
||||
@ -193,10 +273,20 @@ define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) {
|
||||
; SSE-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16i8_abi_alignment:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v16i8_abi_alignment:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v16i8_abi_alignment:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v16i8_abi_alignment:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <16 x i8>, <16 x i8>* %V
|
||||
ret <16 x i8> %0
|
||||
@ -208,10 +298,20 @@ define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) {
|
||||
; SSE-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8i16_abi_alignment:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v8i16_abi_alignment:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v8i16_abi_alignment:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v8i16_abi_alignment:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <8 x i16>, <8 x i16>* %V
|
||||
ret <8 x i16> %0
|
||||
@ -223,10 +323,20 @@ define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) {
|
||||
; SSE-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4i32_abi_alignment:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v4i32_abi_alignment:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v4i32_abi_alignment:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v4i32_abi_alignment:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <4 x i32>, <4 x i32>* %V
|
||||
ret <4 x i32> %0
|
||||
@ -238,10 +348,20 @@ define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) {
|
||||
; SSE-NEXT: movdqa (%rdi), %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v2i64_abi_alignment:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v2i64_abi_alignment:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v2i64_abi_alignment:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v2i64_abi_alignment:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %xmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <2 x i64>, <2 x i64>* %V
|
||||
ret <2 x i64> %0
|
||||
@ -284,10 +404,20 @@ define <32 x i8> @test_v32i8(<32 x i8>* %V) {
|
||||
; SSE-NEXT: movaps 16(%rdi), %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v32i8:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v32i8:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v32i8:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v32i8:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %ymm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <32 x i8>, <32 x i8>* %V, align 32
|
||||
ret <32 x i8> %0
|
||||
@ -300,10 +430,20 @@ define <16 x i16> @test_v16i16(<16 x i16>* %V) {
|
||||
; SSE-NEXT: movaps 16(%rdi), %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16i16:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v16i16:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v16i16:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v16i16:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %ymm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <16 x i16>, <16 x i16>* %V, align 32
|
||||
ret <16 x i16> %0
|
||||
@ -316,10 +456,20 @@ define <8 x i32> @test_v8i32(<8 x i32>* %V) {
|
||||
; SSE-NEXT: movaps 16(%rdi), %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8i32:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v8i32:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v8i32:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v8i32:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %ymm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <8 x i32>, <8 x i32>* %V, align 16
|
||||
ret <8 x i32> %0
|
||||
@ -332,10 +482,20 @@ define <4 x i64> @test_v4i64(<4 x i64>* %V) {
|
||||
; SSE-NEXT: movaps 16(%rdi), %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4i64:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v4i64:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v4i64:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v4i64:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqa64 (%rdi), %ymm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <4 x i64>, <4 x i64>* %V, align 32
|
||||
ret <4 x i64> %0
|
||||
@ -348,10 +508,20 @@ define <32 x i8> @test_v32i8_unaligned(<32 x i8>* %V) {
|
||||
; SSE-NEXT: movups 16(%rdi), %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v32i8_unaligned:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v32i8_unaligned:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v32i8_unaligned:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v32i8_unaligned:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %ymm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <32 x i8>, <32 x i8>* %V, align 4
|
||||
ret <32 x i8> %0
|
||||
@ -364,10 +534,20 @@ define <16 x i16> @test_v16i16_unaligned(<16 x i16>* %V) {
|
||||
; SSE-NEXT: movups 16(%rdi), %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v16i16_unaligned:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v16i16_unaligned:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v16i16_unaligned:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v16i16_unaligned:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %ymm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <16 x i16>, <16 x i16>* %V, align 4
|
||||
ret <16 x i16> %0
|
||||
@ -380,10 +560,20 @@ define <8 x i32> @test_v8i32_unaligned(<8 x i32>* %V) {
|
||||
; SSE-NEXT: movups 16(%rdi), %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v8i32_unaligned:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v8i32_unaligned:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v8i32_unaligned:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v8i32_unaligned:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %ymm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <8 x i32>, <8 x i32>* %V, align 4
|
||||
ret <8 x i32> %0
|
||||
@ -396,10 +586,20 @@ define <4 x i64> @test_v4i64_unaligned(<4 x i64>* %V) {
|
||||
; SSE-NEXT: movups 16(%rdi), %xmm1
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_v4i64_unaligned:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
; AVXONLY-LABEL: test_v4i64_unaligned:
|
||||
; AVXONLY: # BB#0: # %entry
|
||||
; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; AVXONLY-NEXT: retq
|
||||
;
|
||||
; KNL-LABEL: test_v4i64_unaligned:
|
||||
; KNL: # BB#0: # %entry
|
||||
; KNL-NEXT: vmovdqu (%rdi), %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_v4i64_unaligned:
|
||||
; SKX: # BB#0: # %entry
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %ymm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = load <4 x i64>, <4 x i64>* %V, align 4
|
||||
ret <4 x i64> %0
|
||||
|
Loading…
x
Reference in New Issue
Block a user