mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-17 23:44:43 +00:00
R600: Add support for ISD::FROUND
NOTE: This is a candidate for the 3.4 branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195878 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
60ffb59df3
commit
496dbfe7b9
@ -58,6 +58,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
setOperationAction(ISD::FABS, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FRINT, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FROUND, MVT::f32, Legal);
|
||||
|
||||
// The hardware supports ROTR, but not ROTL
|
||||
setOperationAction(ISD::ROTL, MVT::i32, Expand);
|
||||
|
@ -83,3 +83,6 @@ def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE",
|
||||
def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",
|
||||
SDTypeProfile<0, 2, []>,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
def AMDGPUround : SDNode<"ISD::FROUND",
|
||||
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>;
|
||||
|
@ -1110,6 +1110,10 @@ class COS_Common <bits<11> inst> : R600_1OP <
|
||||
let Itinerary = TransALU;
|
||||
}
|
||||
|
||||
def CLAMP_R600 : CLAMP <R600_Reg32>;
|
||||
def FABS_R600 : FABS<R600_Reg32>;
|
||||
def FNEG_R600 : FNEG<R600_Reg32>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helper patterns for complex intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1132,6 +1136,13 @@ class TGSI_LIT_Z_Common <InstR600 mul_lit, InstR600 log_clamped, InstR600 exp_ie
|
||||
(exp_ieee (mul_lit (log_clamped (MAX $src_y, (f32 ZERO))), $src_w, $src_x))
|
||||
>;
|
||||
|
||||
// FROUND pattern
|
||||
class FROUNDPat<Instruction CNDGE> : Pat <
|
||||
(AMDGPUround f32:$x),
|
||||
(CNDGE (ADD (FNEG_R600 (f32 HALF)), (FRACT $x)), (CEIL $x), (FLOOR $x))
|
||||
>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// R600 / R700 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -1173,6 +1184,7 @@ let Predicates = [isR600] in {
|
||||
def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>;
|
||||
|
||||
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>;
|
||||
def : FROUNDPat <CNDGE_r600>;
|
||||
|
||||
def R600_ExportSwz : ExportSwzInst {
|
||||
let Word1{20-17} = 0; // BURST_COUNT
|
||||
@ -1726,6 +1738,8 @@ def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET",
|
||||
// SHA-256 Patterns
|
||||
def : SHA256MaPattern <BFI_INT_eg, XOR_INT>;
|
||||
|
||||
def : FROUNDPat <CNDGE_eg>;
|
||||
|
||||
def EG_ExportSwz : ExportSwzInst {
|
||||
let Word1{19-16} = 0; // BURST_COUNT
|
||||
let Word1{20} = 0; // VALID_PIXEL_MODE
|
||||
@ -2090,10 +2104,6 @@ def TXD_SHADOW: InstR600 <
|
||||
} // End isPseudo = 1
|
||||
} // End usesCustomInserter = 1
|
||||
|
||||
def CLAMP_R600 : CLAMP <R600_Reg32>;
|
||||
def FABS_R600 : FABS<R600_Reg32>;
|
||||
def FNEG_R600 : FNEG<R600_Reg32>;
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Return instruction
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
41
test/CodeGen/R600/llvm.round.ll
Normal file
41
test/CodeGen/R600/llvm.round.ll
Normal file
@ -0,0 +1,41 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
|
||||
|
||||
; FUNC-LABEL: @f32
|
||||
; R600: FRACT
|
||||
; R600-DAG: ADD
|
||||
; R600-DAG: CEIL
|
||||
; R600-DAG: FLOOR
|
||||
; R600: CNDGE
|
||||
define void @f32(float addrspace(1)* %out, float %in) {
|
||||
entry:
|
||||
%0 = call float @llvm.round.f32(float %in)
|
||||
store float %0, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; The vector tests are really difficult to verify, since it can be hard to
|
||||
; predict how the scheduler will order the instructions. We already have
|
||||
; a test for the scalar case, so the vector tests just check that the
|
||||
; compiler doesn't crash.
|
||||
|
||||
; FUNC-LABEL: v2f32
|
||||
; R600: CF_END
|
||||
define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
|
||||
entry:
|
||||
%0 = call <2 x float> @llvm.round.v2f32(<2 x float> %in)
|
||||
store <2 x float> %0, <2 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; FUNC-LABEL: v4f32
|
||||
; R600: CF_END
|
||||
define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
|
||||
entry:
|
||||
%0 = call <4 x float> @llvm.round.v4f32(<4 x float> %in)
|
||||
store <4 x float> %0, <4 x float> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.round.f32(float)
|
||||
declare <2 x float> @llvm.round.v2f32(<2 x float>)
|
||||
declare <4 x float> @llvm.round.v4f32(<4 x float>)
|
Loading…
x
Reference in New Issue
Block a user