diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 6c2185eafe8..fb8d32fe39b 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -503,6 +503,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32; case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; + case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; + case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; + case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; + case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; + case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; + case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; + case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index fb25acaa1cd..eb10541b6f2 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -977,47 +977,27 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; -defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", - [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))] ->; -defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", - [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))] ->; -defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", - [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))] ->; -defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", - [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))] ->; +defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; +defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; +defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; +defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; -defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", - [(set i32:$dst, (srl i32:$src0, i32:$src1))] ->; +defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">; -defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", - [(set i32:$dst, (sra i32:$src0, i32:$src1))] ->; +defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">; let hasPostISelHook = 1 in { -defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", - [(set i32:$dst, (shl i32:$src0, i32:$src1))] ->; +defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; } defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">; -defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", - [(set i32:$dst, (and i32:$src0, i32:$src1))] ->; -defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", - [(set i32:$dst, (or i32:$src0, i32:$src1))] ->; -defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", - [(set i32:$dst, (xor i32:$src0, i32:$src1))] ->; +defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>; +defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>; +defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>; } // End isCommutable = 1 @@ -1215,10 +1195,18 @@ def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", } // End Uses = [SCC] } // End Defs = [SCC] -def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", []>; -def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", []>; -def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", []>; -def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", []>; +def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", + [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))] +>; +def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", + [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))] +>; +def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", + [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))] +>; +def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", + [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))] +>; def S_CSELECT_B32 : SOP2 < 0x0000000a, (outs SReg_32:$dst), @@ -1228,7 +1216,9 @@ def S_CSELECT_B32 : SOP2 < def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; -def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", []>; +def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", + [(set i32:$dst, (and i32:$src0, i32:$src1))] +>; def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", [(set i64:$dst, (and i64:$src0, i64:$src1))] @@ -1239,13 +1229,23 @@ def : Pat < (S_AND_B64 $src0, $src1) >; -def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", []>; -def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", []>; +def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", + [(set i32:$dst, (or i32:$src0, i32:$src1))] +>; + +def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", + [] +>; + def : Pat < (i1 (or i1:$src0, i1:$src1)), (S_OR_B64 $src0, $src1) >; -def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", []>; + +def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", + [(set i32:$dst, (xor i32:$src0, i32:$src1))] +>; + def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", [(set i1:$dst, (xor i1:$src0, i1:$src1))] >; diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/R600/llvm.AMDGPU.imax.ll index 1336f4eeeed..01c9f435b9f 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imax.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.imax.ll @@ -1,12 +1,23 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s -;CHECK: V_MAX_I32_e32 - -define void @main(i32 %p0, i32 %p1) #0 { +; SI-LABEL: @vector_imax +; SI: V_MAX_I32_e32 +define void @vector_imax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 { main_body: - %0 = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1) - %1 = bitcast i32 %0 to float - call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1) + %load = load i32 addrspace(1)* %in, align 4 + %max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %load) + %bc = bitcast i32 %max to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc) + ret void +} + +; SI-LABEL: @scalar_imax +; SI: S_MAX_I32 +define void @scalar_imax(i32 %p0, i32 %p1) #0 { +entry: + %max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1) + %bc = bitcast i32 %max to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc) ret void } @@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.imax(i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) -attributes #0 = { "ShaderType"="0" } -attributes #1 = { readnone } +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/R600/llvm.AMDGPU.imin.ll index 3435ea471e4..565bf344408 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.imin.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.imin.ll @@ -1,12 +1,23 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s -;CHECK: V_MIN_I32_e32 - -define void @main(i32 %p0, i32 %p1) #0 { +; SI-LABEL: @vector_imin +; SI: V_MIN_I32_e32 +define void @vector_imin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 { main_body: - %0 = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1) - %1 = bitcast i32 %0 to float - call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1) + %load = load i32 addrspace(1)* %in, align 4 + %min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %load) + %bc = bitcast i32 %min to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc) + ret void +} + +; SI-LABEL: @scalar_imin +; SI: S_MIN_I32 +define void @scalar_imin(i32 %p0, i32 %p1) #0 { +entry: + %min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1) + %bc = bitcast i32 %min to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc) ret void } @@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.imin(i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) -attributes #0 = { "ShaderType"="0" } -attributes #1 = { readnone } +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll index 4cfa133208e..c3e1cfe9019 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umax.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll @@ -1,12 +1,23 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s -;CHECK: V_MAX_U32_e32 - -define void @main(i32 %p0, i32 %p1) #0 { +; SI-LABEL: @vector_umax +; SI: V_MAX_U32_e32 +define void @vector_umax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 { main_body: - %0 = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1) - %1 = bitcast i32 %0 to float - call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1) + %load = load i32 addrspace(1)* %in, align 4 + %max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %load) + %bc = bitcast i32 %max to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc) + ret void +} + +; SI-LABEL: @scalar_umax +; SI: S_MAX_U32 +define void @scalar_umax(i32 %p0, i32 %p1) #0 { +entry: + %max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1) + %bc = bitcast i32 %max to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc) ret void } @@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.umax(i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) -attributes #0 = { "ShaderType"="0" } -attributes #1 = { readnone } +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll index 14af0519bc9..460a7b2d425 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umin.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll @@ -1,12 +1,23 @@ -;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s -;CHECK: V_MIN_U32_e32 - -define void @main(i32 %p0, i32 %p1) #0 { +; SI-LABEL: @vector_umin +; SI: V_MIN_U32_e32 +define void @vector_umin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 { main_body: - %0 = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1) - %1 = bitcast i32 %0 to float - call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %1, float %1, float %1, float %1) + %load = load i32 addrspace(1)* %in, align 4 + %min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %load) + %bc = bitcast i32 %min to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc) + ret void +} + +; SI-LABEL: @scalar_umin +; SI: S_MIN_U32 +define void @scalar_umin(i32 %p0, i32 %p1) #0 { +entry: + %min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1) + %bc = bitcast i32 %min to float + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc) ret void } @@ -15,7 +26,7 @@ declare i32 @llvm.AMDGPU.umin(i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) -attributes #0 = { "ShaderType"="0" } -attributes #1 = { readnone } +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll index 35d23b3d27a..35fc8b33e0b 100644 --- a/test/CodeGen/R600/or.ll +++ b/test/CodeGen/R600/or.ll @@ -39,6 +39,23 @@ define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) ret void } +; SI-CHECK-LABEL: @scalar_or_i32 +; SI-CHECK: S_OR_B32 +define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %or = or i32 %a, %b + store i32 %or, i32 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @vector_or_i32 +; SI-CHECK: V_OR_B32_e32 v{{[0-9]}} +define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) { + %loada = load i32 addrspace(1)* %a + %or = or i32 %loada, %b + store i32 %or, i32 addrspace(1)* %out + ret void +} + ; EG-CHECK-LABEL: @or_i64 ; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y ; EG-CHECK-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z diff --git a/test/CodeGen/R600/trunc-store-i1.ll b/test/CodeGen/R600/trunc-store-i1.ll index c3f534ffed5..a88894325b6 100644 --- a/test/CodeGen/R600/trunc-store-i1.ll +++ b/test/CodeGen/R600/trunc-store-i1.ll @@ -3,7 +3,8 @@ ; SI-LABEL: @global_truncstore_i32_to_i1 ; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]], -; SI: V_AND_B32_e64 [[VREG:v[0-9]+]], 1, [[LOAD]], 0, 0, 0, 0 +; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1 +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]] ; SI: BUFFER_STORE_BYTE [[VREG]], define void @global_truncstore_i32_to_i1(i1 addrspace(1)* %out, i32 %val) nounwind { %trunc = trunc i32 %val to i1 @@ -21,7 +22,8 @@ define void @global_truncstore_i64_to_i1(i1 addrspace(1)* %out, i64 %val) nounwi ; SI-LABEL: @global_truncstore_i16_to_i1 ; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]], -; SI: V_AND_B32_e64 [[VREG:v[0-9]+]], 1, [[LOAD]], 0, 0, 0, 0 +; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1 +; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]] ; SI: BUFFER_STORE_BYTE [[VREG]], define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind { %trunc = trunc i16 %val to i1 diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll index c12b0c1ce2c..49ed12da242 100644 --- a/test/CodeGen/R600/xor.ll +++ b/test/CodeGen/R600/xor.ll @@ -54,3 +54,21 @@ define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float ad store float %result, float addrspace(1)* %out ret void } + +; SI-CHECK-LABEL: @vector_xor_i32 +; SI-CHECK: V_XOR_B32_e32 +define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) { + %a = load i32 addrspace(1)* %in0 + %b = load i32 addrspace(1)* %in1 + %result = xor i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; SI-CHECK-LABEL: @scalar_xor_i32 +; SI-CHECK: S_XOR_B32 +define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %result = xor i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +}