R600/SI: Move instruction patterns to scalar versions.

Some of them also had the pattern on both, so this removes the
duplication.

llvm-svn: 204492
This commit is contained in:
Matt Arsenault 2014-03-21 18:01:18 +00:00
parent dba5764b6a
commit f0af6362fd
9 changed files with 164 additions and 76 deletions

View File

@ -503,6 +503,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32;
case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32;
case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;

View File

@ -977,47 +977,27 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
>;
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
[(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
>;
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
[(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
>;
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
[(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
>;
defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
[(set i32:$dst, (srl i32:$src0, i32:$src1))]
>;
defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
[(set i32:$dst, (sra i32:$src0, i32:$src1))]
>;
defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
let hasPostISelHook = 1 in {
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
[(set i32:$dst, (shl i32:$src0, i32:$src1))]
>;
defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
}
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
[(set i32:$dst, (and i32:$src0, i32:$src1))]
>;
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
[(set i32:$dst, (or i32:$src0, i32:$src1))]
>;
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>;
defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>;
defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>;
} // End isCommutable = 1
@ -1215,10 +1195,18 @@ def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
} // End Uses = [SCC]
} // End Defs = [SCC]
def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>;
def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>;
def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>;
def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>;
def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32",
[(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
>;
def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32",
[(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
>;
def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32",
[(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
>;
def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32",
[(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
>;
def S_CSELECT_B32 : SOP2 <
0x0000000a, (outs SReg_32:$dst),
@ -1228,7 +1216,9 @@ def S_CSELECT_B32 : SOP2 <
def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>;
def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32",
[(set i32:$dst, (and i32:$src0, i32:$src1))]
>;
def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
[(set i64:$dst, (and i64:$src0, i64:$src1))]
@ -1239,13 +1229,23 @@ def : Pat <
(S_AND_B64 $src0, $src1)
>;
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>;
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>;
def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
[(set i32:$dst, (or i32:$src0, i32:$src1))]
>;
def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
[]
>;
def : Pat <
(i1 (or i1:$src0, i1:$src1)),
(S_OR_B64 $src0, $src1)
>;
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>;
def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
[(set i32:$dst, (xor i32:$src0, i32:$src1))]
>;
def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
[(set i1:$dst, (xor i1:$src0, i1:$src1))]
>;

View File

@ -1,12 +1,23 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
;CHECK: V_MAX_I32_e32
define void @main(i32 %p0, i32 %p1) #0 {
; SI-LABEL: @vector_imax
; SI: V_MAX_I32_e32
define void @vector_imax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%0 = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1)
%1 = bitcast i32 %0 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
%load = load i32 addrspace(1)* %in, align 4
%max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %load)
%bc = bitcast i32 %max to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
ret void
}
; SI-LABEL: @scalar_imax
; SI: S_MAX_I32
define void @scalar_imax(i32 %p0, i32 %p1) #0 {
entry:
%max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1)
%bc = bitcast i32 %max to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
ret void
}
@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.imax(i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!0 = metadata !{metadata !"const", null, i32 1}

View File

@ -1,12 +1,23 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
;CHECK: V_MIN_I32_e32
define void @main(i32 %p0, i32 %p1) #0 {
; SI-LABEL: @vector_imin
; SI: V_MIN_I32_e32
define void @vector_imin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%0 = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1)
%1 = bitcast i32 %0 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
%load = load i32 addrspace(1)* %in, align 4
%min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %load)
%bc = bitcast i32 %min to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
ret void
}
; SI-LABEL: @scalar_imin
; SI: S_MIN_I32
define void @scalar_imin(i32 %p0, i32 %p1) #0 {
entry:
%min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1)
%bc = bitcast i32 %min to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
ret void
}
@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.imin(i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!0 = metadata !{metadata !"const", null, i32 1}

View File

@ -1,12 +1,23 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
;CHECK: V_MAX_U32_e32
define void @main(i32 %p0, i32 %p1) #0 {
; SI-LABEL: @vector_umax
; SI: V_MAX_U32_e32
define void @vector_umax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%0 = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1)
%1 = bitcast i32 %0 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
%load = load i32 addrspace(1)* %in, align 4
%max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %load)
%bc = bitcast i32 %max to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
ret void
}
; SI-LABEL: @scalar_umax
; SI: S_MAX_U32
define void @scalar_umax(i32 %p0, i32 %p1) #0 {
entry:
%max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1)
%bc = bitcast i32 %max to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
ret void
}
@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.umax(i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!0 = metadata !{metadata !"const", null, i32 1}

View File

@ -1,12 +1,23 @@
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
;CHECK: V_MIN_U32_e32
define void @main(i32 %p0, i32 %p1) #0 {
; SI-LABEL: @vector_umin
; SI: V_MIN_U32_e32
define void @vector_umin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
main_body:
%0 = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1)
%1 = bitcast i32 %0 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1)
%load = load i32 addrspace(1)* %in, align 4
%min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %load)
%bc = bitcast i32 %min to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
ret void
}
; SI-LABEL: @scalar_umin
; SI: S_MIN_U32
define void @scalar_umin(i32 %p0, i32 %p1) #0 {
entry:
%min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1)
%bc = bitcast i32 %min to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
ret void
}
@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.umin(i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
!0 = metadata !{metadata !"const", null, i32 1}

View File

@ -39,6 +39,23 @@ define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in)
ret void
}
; SI-CHECK-LABEL: @scalar_or_i32
; SI-CHECK: S_OR_B32
define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%or = or i32 %a, %b
store i32 %or, i32 addrspace(1)* %out
ret void
}
; SI-CHECK-LABEL: @vector_or_i32
; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}
define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
%loada = load i32 addrspace(1)* %a
%or = or i32 %loada, %b
store i32 %or, i32 addrspace(1)* %out
ret void
}
; EG-CHECK-LABEL: @or_i64
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z

View File

@ -3,7 +3,8 @@
; SI-LABEL: @global_truncstore_i32_to_i1
; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]],
; SI: V_AND_B32_e64 [[VREG:v[0-9]+]], 1, [[LOAD]], 0, 0, 0, 0
; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]]
; SI: BUFFER_STORE_BYTE [[VREG]],
define void @global_truncstore_i32_to_i1(i1 addrspace(1)* %out, i32 %val) nounwind {
%trunc = trunc i32 %val to i1
@ -21,7 +22,8 @@ define void @global_truncstore_i64_to_i1(i1 addrspace(1)* %out, i64 %val) nounwi
; SI-LABEL: @global_truncstore_i16_to_i1
; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]],
; SI: V_AND_B32_e64 [[VREG:v[0-9]+]], 1, [[LOAD]], 0, 0, 0, 0
; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1
; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]]
; SI: BUFFER_STORE_BYTE [[VREG]],
define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind {
%trunc = trunc i16 %val to i1

View File

@ -54,3 +54,21 @@ define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float ad
store float %result, float addrspace(1)* %out
ret void
}
; SI-CHECK-LABEL: @vector_xor_i32
; SI-CHECK: V_XOR_B32_e32
define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
%a = load i32 addrspace(1)* %in0
%b = load i32 addrspace(1)* %in1
%result = xor i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
}
; SI-CHECK-LABEL: @scalar_xor_i32
; SI-CHECK: S_XOR_B32
define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
%result = xor i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
ret void
}