mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-13 07:50:50 +00:00
R600/SI: Move instruction patterns to scalar versions.
Some of them also had the pattern on both, so this removes the duplication. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204492 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d38fea31a5
commit
55d17f4842
@ -503,6 +503,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
|
||||
case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
|
||||
case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
|
||||
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
|
||||
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
|
||||
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
|
||||
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
|
||||
case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
|
||||
case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
|
||||
case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
|
||||
case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
|
||||
case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
|
||||
case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
|
||||
case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
|
||||
|
@ -977,47 +977,27 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
|
||||
|
||||
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
|
||||
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
|
||||
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
|
||||
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
|
||||
[(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
|
||||
[(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
|
||||
[(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
|
||||
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
|
||||
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
|
||||
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
|
||||
|
||||
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
|
||||
[(set i32:$dst, (srl i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
|
||||
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
|
||||
|
||||
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
|
||||
[(set i32:$dst, (sra i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
|
||||
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
|
||||
|
||||
let hasPostISelHook = 1 in {
|
||||
|
||||
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
|
||||
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
|
||||
|
||||
}
|
||||
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
|
||||
|
||||
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
|
||||
[(set i32:$dst, (and i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
|
||||
[(set i32:$dst, (or i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
|
||||
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
|
||||
>;
|
||||
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>;
|
||||
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>;
|
||||
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>;
|
||||
|
||||
} // End isCommutable = 1
|
||||
|
||||
@ -1215,10 +1195,18 @@ def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
|
||||
} // End Uses = [SCC]
|
||||
} // End Defs = [SCC]
|
||||
|
||||
def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
|
||||
def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
|
||||
def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
|
||||
def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
|
||||
def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32",
|
||||
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
|
||||
>;
|
||||
def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32",
|
||||
[(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
|
||||
>;
|
||||
def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32",
|
||||
[(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
|
||||
>;
|
||||
def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32",
|
||||
[(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
|
||||
>;
|
||||
|
||||
def S_CSELECT_B32 : SOP2 <
|
||||
0x0000000a, (outs SReg_32:$dst),
|
||||
@ -1228,7 +1216,9 @@ def S_CSELECT_B32 : SOP2 <
|
||||
|
||||
def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
|
||||
|
||||
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
|
||||
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32",
|
||||
[(set i32:$dst, (and i32:$src0, i32:$src1))]
|
||||
>;
|
||||
|
||||
def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
|
||||
[(set i64:$dst, (and i64:$src0, i64:$src1))]
|
||||
@ -1239,13 +1229,23 @@ def : Pat <
|
||||
(S_AND_B64 $src0, $src1)
|
||||
>;
|
||||
|
||||
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
|
||||
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
|
||||
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
|
||||
[(set i32:$dst, (or i32:$src0, i32:$src1))]
|
||||
>;
|
||||
|
||||
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
|
||||
[]
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i1 (or i1:$src0, i1:$src1)),
|
||||
(S_OR_B64 $src0, $src1)
|
||||
>;
|
||||
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
|
||||
|
||||
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
|
||||
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
|
||||
>;
|
||||
|
||||
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
|
||||
[(set i1:$dst, (xor i1:$src0, i1:$src1))]
|
||||
>;
|
||||
|
@ -1,12 +1,23 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
|
||||
|
||||
;CHECK: V_MAX_I32_e32
|
||||
|
||||
define void @main(i32 %p0, i32 %p1) #0 {
|
||||
; SI-LABEL: @vector_imax
|
||||
; SI: V_MAX_I32_e32
|
||||
define void @vector_imax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
|
||||
main_body:
|
||||
%0 = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1)
|
||||
%1 = bitcast i32 %0 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %load)
|
||||
%bc = bitcast i32 %max to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @scalar_imax
|
||||
; SI: S_MAX_I32
|
||||
define void @scalar_imax(i32 %p0, i32 %p1) #0 {
|
||||
entry:
|
||||
%max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1)
|
||||
%bc = bitcast i32 %max to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.imax(i32, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
attributes #1 = { readnone }
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!0 = metadata !{metadata !"const", null, i32 1}
|
||||
|
@ -1,12 +1,23 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
|
||||
|
||||
;CHECK: V_MIN_I32_e32
|
||||
|
||||
define void @main(i32 %p0, i32 %p1) #0 {
|
||||
; SI-LABEL: @vector_imin
|
||||
; SI: V_MIN_I32_e32
|
||||
define void @vector_imin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
|
||||
main_body:
|
||||
%0 = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1)
|
||||
%1 = bitcast i32 %0 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %load)
|
||||
%bc = bitcast i32 %min to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @scalar_imin
|
||||
; SI: S_MIN_I32
|
||||
define void @scalar_imin(i32 %p0, i32 %p1) #0 {
|
||||
entry:
|
||||
%min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1)
|
||||
%bc = bitcast i32 %min to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.imin(i32, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
attributes #1 = { readnone }
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!0 = metadata !{metadata !"const", null, i32 1}
|
||||
|
@ -1,12 +1,23 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
|
||||
|
||||
;CHECK: V_MAX_U32_e32
|
||||
|
||||
define void @main(i32 %p0, i32 %p1) #0 {
|
||||
; SI-LABEL: @vector_umax
|
||||
; SI: V_MAX_U32_e32
|
||||
define void @vector_umax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
|
||||
main_body:
|
||||
%0 = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1)
|
||||
%1 = bitcast i32 %0 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %load)
|
||||
%bc = bitcast i32 %max to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @scalar_umax
|
||||
; SI: S_MAX_U32
|
||||
define void @scalar_umax(i32 %p0, i32 %p1) #0 {
|
||||
entry:
|
||||
%max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1)
|
||||
%bc = bitcast i32 %max to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.umax(i32, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
attributes #1 = { readnone }
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!0 = metadata !{metadata !"const", null, i32 1}
|
||||
|
@ -1,12 +1,23 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
|
||||
|
||||
;CHECK: V_MIN_U32_e32
|
||||
|
||||
define void @main(i32 %p0, i32 %p1) #0 {
|
||||
; SI-LABEL: @vector_umin
|
||||
; SI: V_MIN_U32_e32
|
||||
define void @vector_umin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
|
||||
main_body:
|
||||
%0 = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1)
|
||||
%1 = bitcast i32 %0 to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
|
||||
%load = load i32 addrspace(1)* %in, align 4
|
||||
%min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %load)
|
||||
%bc = bitcast i32 %min to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: @scalar_umin
|
||||
; SI: S_MIN_U32
|
||||
define void @scalar_umin(i32 %p0, i32 %p1) #0 {
|
||||
entry:
|
||||
%min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1)
|
||||
%bc = bitcast i32 %min to float
|
||||
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
|
||||
ret void
|
||||
}
|
||||
|
||||
@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.umin(i32, i32) #1
|
||||
|
||||
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
attributes #1 = { readnone }
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!0 = metadata !{metadata !"const", null, i32 1}
|
||||
|
@ -39,6 +39,23 @@ define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in)
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @scalar_or_i32
|
||||
; SI-CHECK: S_OR_B32
|
||||
define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
%or = or i32 %a, %b
|
||||
store i32 %or, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @vector_or_i32
|
||||
; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
|
||||
define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
|
||||
%loada = load i32 addrspace(1)* %a
|
||||
%or = or i32 %loada, %b
|
||||
store i32 %or, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; EG-CHECK-LABEL: @or_i64
|
||||
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
|
||||
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
|
||||
|
@ -3,7 +3,8 @@
|
||||
|
||||
; SI-LABEL: @global_truncstore_i32_to_i1
|
||||
; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]],
|
||||
; SI: V_AND_B32_e64 [[VREG:v[0-9]+]], 1, [[LOAD]], 0, 0, 0, 0
|
||||
; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]]
|
||||
; SI: BUFFER_STORE_BYTE [[VREG]],
|
||||
define void @global_truncstore_i32_to_i1(i1 addrspace(1)* %out, i32 %val) nounwind {
|
||||
%trunc = trunc i32 %val to i1
|
||||
@ -21,7 +22,8 @@ define void @global_truncstore_i64_to_i1(i1 addrspace(1)* %out, i64 %val) nounwi
|
||||
|
||||
; SI-LABEL: @global_truncstore_i16_to_i1
|
||||
; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]],
|
||||
; SI: V_AND_B32_e64 [[VREG:v[0-9]+]], 1, [[LOAD]], 0, 0, 0, 0
|
||||
; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1
|
||||
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]]
|
||||
; SI: BUFFER_STORE_BYTE [[VREG]],
|
||||
define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind {
|
||||
%trunc = trunc i16 %val to i1
|
||||
|
@ -54,3 +54,21 @@ define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float ad
|
||||
store float %result, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @vector_xor_i32
|
||||
; SI-CHECK: V_XOR_B32_e32
|
||||
define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
|
||||
%a = load i32 addrspace(1)* %in0
|
||||
%b = load i32 addrspace(1)* %in1
|
||||
%result = xor i32 %a, %b
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-CHECK-LABEL: @scalar_xor_i32
|
||||
; SI-CHECK: S_XOR_B32
|
||||
define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
|
||||
%result = xor i32 %a, %b
|
||||
store i32 %result, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user