mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-28 22:20:37 +00:00
R600/SI: Use S_LOAD_DWORD instructions for v8i32 and v16i32
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193212 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2b3ea3cdad
commit
f9e5c39811
@ -232,6 +232,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) {
|
||||
} else if (AMDGPU::VReg_256RegClass.contains(reg)) {
|
||||
isSGPR = false;
|
||||
width = 8;
|
||||
} else if (AMDGPU::SReg_512RegClass.contains(reg)) {
|
||||
isSGPR = true;
|
||||
width = 16;
|
||||
} else if (AMDGPU::VReg_512RegClass.contains(reg)) {
|
||||
isSGPR = false;
|
||||
width = 16;
|
||||
|
@ -1820,7 +1820,10 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
|
||||
defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MUBUF Patterns
|
||||
|
@ -159,11 +159,11 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
|
||||
(add SGPR_64Regs, VCCReg, EXECReg)
|
||||
>;
|
||||
|
||||
def SReg_128 : RegisterClass<"AMDGPU", [i128], 128, (add SGPR_128)>;
|
||||
def SReg_128 : RegisterClass<"AMDGPU", [i128, v4i32], 128, (add SGPR_128)>;
|
||||
|
||||
def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
|
||||
|
||||
def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
|
||||
def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 512, (add SGPR_512)>;
|
||||
|
||||
// Register class for all vector registers (VGPRs + Interploation Registers)
|
||||
def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>;
|
||||
|
@ -10,11 +10,16 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: @fneg_v4
|
||||
; CHECK: -PV
|
||||
; CHECK: -T
|
||||
; CHECK: -PV
|
||||
; CHECK: -PV
|
||||
; R600-CHECK-LABEL: @fneg_v4
|
||||
; R600-CHECK: -PV
|
||||
; R600-CHECK: -T
|
||||
; R600-CHECK: -PV
|
||||
; R600-CHECK: -PV
|
||||
; SI-CHECK-LABEL: @fneg_v4
|
||||
; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
; SI-CHECK: V_ADD_F32_e64 VGPR{{[0-9]}}, SGPR{{[0-9]}}, 0, 0, 0, 0, 1
|
||||
define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
|
||||
entry:
|
||||
%0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
|
||||
|
Loading…
Reference in New Issue
Block a user