From 53ea122b3dee94d0e3e9632d090428423e6802af Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 13 Feb 2016 01:19:56 +0000 Subject: [PATCH] AMDGPU: Add intrinsics for sin/cos These provide direct access to the hardware instruction without the unit version required like llvm.sin/llvm.cos lowering requires. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260782 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsAMDGPU.td | 12 ++++++++++++ lib/Target/AMDGPU/SIISelLowering.cpp | 16 ++++++++++++++++ lib/Target/AMDGPU/SIInstructions.td | 3 ++- test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll | 15 +++++++++++++++ test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll | 17 +++++++++++++++++ test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll | 15 +++++++++++++++ 6 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 64f6fea87a9..d2e76b449e0 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -92,6 +92,18 @@ def int_amdgcn_trig_preop : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem] >; +def int_amdgcn_sin : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + +def int_amdgcn_cos : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + +def int_amdgcn_log_clamp : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + def int_amdgcn_rcp : Intrinsic< [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] >; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 298100553e0..0f464e28f84 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1535,6 +1535,22 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), Glue); } + case Intrinsic::amdgcn_sin: + return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1)); + + case Intrinsic::amdgcn_cos: + return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1)); + + case Intrinsic::amdgcn_log_clamp: { + if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + return SDValue(); + + DiagnosticInfoUnsupported BadIntrin( + *MF.getFunction(), "intrinsic not supported on subtarget", + DL.getDebugLoc()); + DAG.getContext()->diagnose(BadIntrin); + return DAG.getUNDEF(VT); + } case Intrinsic::amdgcn_ldexp: return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2)); diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 06113273594..251f92988fb 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -1349,7 +1349,8 @@ let SubtargetPredicate = isSICI in { let SchedRW = [WriteQuarterRate32] in { defm V_MOV_FED_B32 : VOP1InstSI , "v_mov_fed_b32", VOP_I32_I32>; -defm V_LOG_CLAMP_F32 : VOP1InstSI , "v_log_clamp_f32", VOP_F32_F32>; +defm V_LOG_CLAMP_F32 : VOP1InstSI , "v_log_clamp_f32", + VOP_F32_F32, int_amdgcn_log_clamp>; defm V_RCP_CLAMP_F32 : VOP1InstSI , "v_rcp_clamp_f32", VOP_F32_F32>; defm V_RCP_LEGACY_F32 : VOP1InstSI , "v_rcp_legacy_f32", VOP_F32_F32>; defm V_RSQ_CLAMP_F32 : VOP1InstSI , "v_rsq_clamp_f32", diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll new file mode 100644 index 00000000000..f6495d8155f --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s + +declare float @llvm.amdgcn.cos.f32(float) #0 + +; GCN-LABEL: {{^}}v_cos_f32: +; GCN: v_cos_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} +define void @v_cos_f32(float addrspace(1)* %out, float %src) #1 { + %cos = call float @llvm.amdgcn.cos.f32(float %src) #0 + store float %cos, float addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll new file mode 100644 index 00000000000..f78257f1d22 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERR %s + +; ERR: intrinsic not supported on subtarget + +declare float @llvm.amdgcn.log.clamp.f32(float) #0 + +; GCN-LABEL: {{^}}v_log_clamp_f32: +; GCN: v_log_clamp_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} +define void @v_log_clamp_f32(float addrspace(1)* %out, float %src) #1 { + %log.clamp = call float @llvm.amdgcn.log.clamp.f32(float %src) #0 + store float %log.clamp, float addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll new file mode 100644 index 00000000000..9dc4554b88a --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s + +declare float @llvm.amdgcn.sin.f32(float) #0 + +; GCN-LABEL: {{^}}v_sin_f32: +; GCN: v_sin_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} +define void @v_sin_f32(float addrspace(1)* %out, float %src) #1 { + %sin = call float @llvm.amdgcn.sin.f32(float %src) #0 + store float %sin, float addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind }