mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-15 00:16:42 +00:00
[X86] Add AVX 256-bit load and stores to fast isel.
I'm not sure why this was missing for so long. This also exposed that we were picking floating point 256-bit VMOVNTPS for some integer types in normal isel for AVX1 even though VMOVNTDQ is available. In practice it doesn't matter due to the execution dependency fix pass, but it required extra isel patterns. Fixing that in a follow up commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271481 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
581e354beb
commit
f543143a73
@ -348,6 +348,7 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
|
||||
bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
MachineMemOperand *MMO, unsigned &ResultReg,
|
||||
unsigned Alignment) {
|
||||
bool HasAVX = Subtarget->hasAVX();
|
||||
// Get opcode and regclass of the output for the given load instruction.
|
||||
unsigned Opc = 0;
|
||||
const TargetRegisterClass *RC = nullptr;
|
||||
@ -373,7 +374,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
break;
|
||||
case MVT::f32:
|
||||
if (X86ScalarSSEf32) {
|
||||
Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
|
||||
Opc = HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
|
||||
RC = &X86::FR32RegClass;
|
||||
} else {
|
||||
Opc = X86::LD_Fp32m;
|
||||
@ -382,7 +383,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
break;
|
||||
case MVT::f64:
|
||||
if (X86ScalarSSEf64) {
|
||||
Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
|
||||
Opc = HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
|
||||
RC = &X86::FR64RegClass;
|
||||
} else {
|
||||
Opc = X86::LD_Fp64m;
|
||||
@ -394,16 +395,16 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
return false;
|
||||
case MVT::v4f32:
|
||||
if (Alignment >= 16)
|
||||
Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
|
||||
Opc = HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
|
||||
else
|
||||
Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
|
||||
Opc = HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
|
||||
RC = &X86::VR128RegClass;
|
||||
break;
|
||||
case MVT::v2f64:
|
||||
if (Alignment >= 16)
|
||||
Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
|
||||
Opc = HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
|
||||
else
|
||||
Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
|
||||
Opc = HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
|
||||
RC = &X86::VR128RegClass;
|
||||
break;
|
||||
case MVT::v4i32:
|
||||
@ -411,11 +412,29 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
|
||||
case MVT::v8i16:
|
||||
case MVT::v16i8:
|
||||
if (Alignment >= 16)
|
||||
Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
|
||||
Opc = HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
|
||||
else
|
||||
Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
|
||||
Opc = HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
|
||||
RC = &X86::VR128RegClass;
|
||||
break;
|
||||
case MVT::v8f32:
|
||||
assert(HasAVX);
|
||||
Opc = (Alignment >= 32) ? X86::VMOVAPSYrm : X86::VMOVUPSYrm;
|
||||
RC = &X86::VR256RegClass;
|
||||
break;
|
||||
case MVT::v4f64:
|
||||
assert(HasAVX);
|
||||
Opc = (Alignment >= 32) ? X86::VMOVAPDYrm : X86::VMOVUPDYrm;
|
||||
RC = &X86::VR256RegClass;
|
||||
break;
|
||||
case MVT::v8i32:
|
||||
case MVT::v4i64:
|
||||
case MVT::v16i16:
|
||||
case MVT::v32i8:
|
||||
assert(HasAVX);
|
||||
Opc = (Alignment >= 32) ? X86::VMOVDQAYrm : X86::VMOVDQUYrm;
|
||||
RC = &X86::VR256RegClass;
|
||||
break;
|
||||
}
|
||||
|
||||
ResultReg = createResultReg(RC);
|
||||
@ -508,7 +527,31 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
|
||||
else
|
||||
Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
|
||||
} else
|
||||
Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
|
||||
Opc = HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
|
||||
break;
|
||||
case MVT::v8f32:
|
||||
assert(HasAVX);
|
||||
if (Aligned)
|
||||
Opc = IsNonTemporal ? X86::VMOVNTPSYmr : X86::VMOVAPSYmr;
|
||||
else
|
||||
Opc = X86::VMOVUPSYmr;
|
||||
break;
|
||||
case MVT::v4f64:
|
||||
assert(HasAVX);
|
||||
if (Aligned) {
|
||||
Opc = IsNonTemporal ? X86::VMOVNTPDYmr : X86::VMOVAPDYmr;
|
||||
} else
|
||||
Opc = X86::VMOVUPDYmr;
|
||||
break;
|
||||
case MVT::v8i32:
|
||||
case MVT::v4i64:
|
||||
case MVT::v16i16:
|
||||
case MVT::v32i8:
|
||||
assert(HasAVX);
|
||||
if (Aligned)
|
||||
Opc = IsNonTemporal ? X86::VMOVNTDQYmr : X86::VMOVDQAYmr;
|
||||
else
|
||||
Opc = X86::VMOVDQUYmr;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -260,7 +260,7 @@ define void @test_nt32xi8(<32 x i8>* nocapture %ptr, <32 x i8> %X) {
|
||||
;
|
||||
; AVX-LABEL: test_nt32xi8:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovntps %ymm0, (%rdi)
|
||||
; AVX-NEXT: vmovntdq %ymm0, (%rdi)
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -282,7 +282,7 @@ define void @test_nt16xi16(<16 x i16>* nocapture %ptr, <16 x i16> %X) {
|
||||
;
|
||||
; AVX-LABEL: test_nt16xi16:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovntps %ymm0, (%rdi)
|
||||
; AVX-NEXT: vmovntdq %ymm0, (%rdi)
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -304,7 +304,7 @@ define void @test_nt8xi32(<8 x i32>* nocapture %ptr, <8 x i32> %X) {
|
||||
;
|
||||
; AVX-LABEL: test_nt8xi32:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovntps %ymm0, (%rdi)
|
||||
; AVX-NEXT: vmovntdq %ymm0, (%rdi)
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -404,8 +404,8 @@ define void @test_nt64xi8(<64 x i8>* nocapture %ptr, <64 x i8> %X) {
|
||||
;
|
||||
; AVX-LABEL: test_nt64xi8:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovntps %ymm0, (%rdi)
|
||||
; AVX-NEXT: vmovntps %ymm1, 32(%rdi)
|
||||
; AVX-NEXT: vmovntdq %ymm0, (%rdi)
|
||||
; AVX-NEXT: vmovntdq %ymm1, 32(%rdi)
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -435,8 +435,8 @@ define void @test_nt32xi16(<32 x i16>* nocapture %ptr, <32 x i16> %X) {
|
||||
;
|
||||
; AVX-LABEL: test_nt32xi16:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovntps %ymm0, (%rdi)
|
||||
; AVX-NEXT: vmovntps %ymm1, 32(%rdi)
|
||||
; AVX-NEXT: vmovntdq %ymm0, (%rdi)
|
||||
; AVX-NEXT: vmovntdq %ymm1, 32(%rdi)
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
@ -466,8 +466,8 @@ define void @test_nt16xi32(<16 x i32>* nocapture %ptr, <16 x i32> %X) {
|
||||
;
|
||||
; AVX-LABEL: test_nt16xi32:
|
||||
; AVX: # BB#0: # %entry
|
||||
; AVX-NEXT: vmovntps %ymm0, (%rdi)
|
||||
; AVX-NEXT: vmovntps %ymm1, 32(%rdi)
|
||||
; AVX-NEXT: vmovntdq %ymm0, (%rdi)
|
||||
; AVX-NEXT: vmovntdq %ymm1, 32(%rdi)
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
|
Loading…
x
Reference in New Issue
Block a user