AMDGPU : Add LLVM intrinsics for SAD related instructions.

Differential Revision: http://reviews.llvm.org/D23133

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@278354 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Wei Ding 2016-08-11 16:33:53 +00:00
parent a6a3520f08
commit 9bcebab62b
10 changed files with 212 additions and 13 deletions

View File

@ -502,12 +502,40 @@ def int_amdgcn_lerp :
GCCBuiltin<"__builtin_amdgcn_lerp">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_amdgcn_sad_u8 :
GCCBuiltin<"__builtin_amdgcn_sad_u8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_amdgcn_msad_u8 :
GCCBuiltin<"__builtin_amdgcn_msad_u8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_amdgcn_sad_hi_u8 :
GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_amdgcn_sad_u16 :
GCCBuiltin<"__builtin_amdgcn_sad_u16">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_amdgcn_qsad_pk_u16_u8 :
GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_amdgcn_mqsad_pk_u16_u8 :
GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_amdgcn_mqsad_u32_u8 :
GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_amdgcn_icmp :
Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrConvergent]>;
def int_amdgcn_fcmp :
Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrConvergent]>;
//===----------------------------------------------------------------------===//

View File

@ -50,15 +50,15 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
// VOP3 Instructions
//===----------------------------------------------------------------------===//
defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
VOP_I32_I32_I32
>;
defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
VOP_I32_I32_I32
>;
defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
VOP_I32_I32_I32
>;
defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x172, 0x1e5>, "v_qsad_pk_u16_u8",
VOP_I32_I32_I32_I32, int_amdgcn_qsad_pk_u16_u8>;
defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x174, 0x1e7>, "v_mqsad_u32_u8",
VOP_I32_I32_I32_I32, int_amdgcn_mqsad_u32_u8>;
let isCommutable = 1 in {
defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",

View File

@ -1603,9 +1603,15 @@ defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32",
VOP_I32_I32_I32_I32, AMDGPUumed3
>;
//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
defm V_SAD_U8 : VOP3Inst <vop3 <0x15a, 0x1d9>, "v_sad_u8",
VOP_I32_I32_I32_I32, int_amdgcn_sad_u8>;
defm V_SAD_HI_U8 : VOP3Inst <vop3 <0x15b, 0x1da>, "v_sad_hi_u8",
VOP_I32_I32_I32_I32, int_amdgcn_sad_hi_u8>;
defm V_SAD_U16 : VOP3Inst <vop3<0x15c, 0x1db>, "v_sad_u16",
VOP_I32_I32_I32_I32, int_amdgcn_sad_u16>;
defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32",
VOP_I32_I32_I32_I32
>;
@ -1707,8 +1713,12 @@ defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
} // End SchedRW = [WriteDouble]
} // End isCommutable = 1, Uses = [VCC, EXEC]
//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
defm V_MSAD_U8 : VOP3Inst <vop3<0x171, 0x1e4>, "v_msad_u8",
VOP_I32_I32_I32_I32, int_amdgcn_msad_u8>;
defm V_MQSAD_PK_U16_U8 : VOP3Inst <vop3<0x173, 0x1e6>, "v_mqsad_pk_u16_u8",
VOP_I32_I32_I32_I32, int_amdgcn_mqsad_pk_u16_u8>;
//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>;
let SchedRW = [WriteDouble] in {

View File

@ -0,0 +1,23 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.mqsad.pk.u16.u8(i32, i32, i32) #0
; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
; GCN: v_mqsad_pk_u16_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_mqsad_pk_u16_u8(i32 addrspace(1)* %out, i32 %src) #1 {
%result= call i32 @llvm.amdgcn.mqsad.pk.u16.u8(i32 %src, i32 100, i32 100) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8_non_immediate:
; GCN: v_mqsad_pk_u16_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_mqsad_pk_u16_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) #1 {
%result= call i32 @llvm.amdgcn.mqsad.pk.u16.u8(i32 %src, i32 %a, i32 %b) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -0,0 +1,23 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.mqsad.u32.u8(i32, i32, i32) #0
; GCN-LABEL: {{^}}v_mqsad_u32_u8:
; GCN: v_mqsad_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_mqsad_u32_u8(i32 addrspace(1)* %out, i32 %src) #1 {
%result= call i32 @llvm.amdgcn.mqsad.u32.u8(i32 %src, i32 100, i32 100) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_mqsad_u32_u8_non_immediate:
; GCN: v_mqsad_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_mqsad_u32_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) #1 {
%result= call i32 @llvm.amdgcn.mqsad.u32.u8(i32 %src, i32 %a, i32 %b) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -0,0 +1,23 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.msad.u8(i32, i32, i32) #0
; GCN-LABEL: {{^}}v_msad_u8:
; GCN: v_msad_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_msad_u8(i32 addrspace(1)* %out, i32 %src) #1 {
%result= call i32 @llvm.amdgcn.msad.u8(i32 %src, i32 100, i32 100) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_msad_u8_non_immediate:
; GCN: v_msad_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_msad_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) #1 {
%result= call i32 @llvm.amdgcn.msad.u8(i32 %src, i32 %a, i32 %b) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -0,0 +1,23 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.qsad.pk.u16.u8(i32, i32, i32) #0
; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
; GCN: v_qsad_pk_u16_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_qsad_pk_u16_u8(i32 addrspace(1)* %out, i32 %src) #1 {
%result= call i32 @llvm.amdgcn.qsad.pk.u16.u8(i32 %src, i32 100, i32 100) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_qsad_pk_u16_u8_non_immediate:
; GCN: v_qsad_pk_u16_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_qsad_pk_u16_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) #1 {
%result= call i32 @llvm.amdgcn.qsad.pk.u16.u8(i32 %src, i32 %a, i32 %b) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -0,0 +1,23 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.sad.hi.u8(i32, i32, i32) #0
; GCN-LABEL: {{^}}v_sad_hi_u8:
; GCN: v_sad_hi_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_sad_hi_u8(i32 addrspace(1)* %out, i32 %src) #1 {
%result= call i32 @llvm.amdgcn.sad.hi.u8(i32 %src, i32 100, i32 100) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_sad_hi_u8_non_immediate:
; GCN: v_sad_hi_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_sad_hi_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) #1 {
%result= call i32 @llvm.amdgcn.sad.hi.u8(i32 %src, i32 %a, i32 %b) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #0 = { nounwind }

View File

@ -0,0 +1,23 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.sad.u16(i32, i32, i32) #0
; GCN-LABEL: {{^}}v_sad_u16:
; GCN: v_sad_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_sad_u16(i32 addrspace(1)* %out, i32 %src) #1 {
%result= call i32 @llvm.amdgcn.sad.u16(i32 %src, i32 100, i32 100) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_sad_u16_non_immediate:
; GCN: v_sad_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_sad_u16_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) #1 {
%result= call i32 @llvm.amdgcn.sad.u16(i32 %src, i32 %a, i32 %b) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #0 = { nounwind }

View File

@ -0,0 +1,23 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.sad.u8(i32, i32, i32) #0
; GCN-LABEL: {{^}}v_sad_u8:
; GCN: v_sad_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_sad_u8(i32 addrspace(1)* %out, i32 %src) #1 {
%result= call i32 @llvm.amdgcn.sad.u8(i32 %src, i32 100, i32 100) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_sad_u8_non_immediate:
; GCN: v_sad_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define void @v_sad_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) #1 {
%result= call i32 @llvm.amdgcn.sad.u8(i32 %src, i32 %a, i32 %b) #0
store i32 %result, i32 addrspace(1)* %out, align 4
ret void
}
attributes #0 = { nounwind readnone }
attributes #0 = { nounwind }