mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-13 23:18:51 +00:00
Combine fcmp + select to fminnum / fmaxnum if no nans and legal
Also require unsafe FP math for no since there isn't a way to test for signed zeros. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225744 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9136fdd9b7
commit
29ad7506e1
@ -4617,6 +4617,43 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
||||
/// \brief Generate Min/Max node
|
||||
static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
|
||||
SDValue True, SDValue False,
|
||||
ISD::CondCode CC, const TargetLowering &TLI,
|
||||
SelectionDAG &DAG) {
|
||||
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
|
||||
return SDValue();
|
||||
|
||||
switch (CC) {
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETLT:
|
||||
case ISD::SETLE:
|
||||
case ISD::SETULT:
|
||||
case ISD::SETULE: {
|
||||
unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
|
||||
if (TLI.isOperationLegal(Opcode, VT))
|
||||
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
|
||||
return SDValue();
|
||||
}
|
||||
case ISD::SETOGT:
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETGT:
|
||||
case ISD::SETGE:
|
||||
case ISD::SETUGT:
|
||||
case ISD::SETUGE: {
|
||||
unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
|
||||
if (TLI.isOperationLegal(Opcode, VT))
|
||||
return DAG.getNode(Opcode, DL, VT, LHS, RHS);
|
||||
return SDValue();
|
||||
}
|
||||
default:
|
||||
return SDValue();
|
||||
}
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitSELECT(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
@ -4696,6 +4733,28 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
|
||||
|
||||
// fold selects based on a setcc into other things, such as min/max/abs
|
||||
if (N0.getOpcode() == ISD::SETCC) {
|
||||
// select x, y (fcmp lt x, y) -> fminnum x, y
|
||||
// select x, y (fcmp gt x, y) -> fmaxnum x, y
|
||||
//
|
||||
// This is OK if we don't care about what happens if either operand is a
|
||||
// NaN.
|
||||
//
|
||||
|
||||
// FIXME: Instead of testing for UnsafeFPMath, this should be checking for
|
||||
// no signed zeros as well as no nans.
|
||||
const TargetOptions &Options = DAG.getTarget().Options;
|
||||
if (Options.UnsafeFPMath &&
|
||||
VT.isFloatingPoint() && N0.hasOneUse() &&
|
||||
DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
|
||||
|
||||
SDValue FMinMax =
|
||||
combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1),
|
||||
N1, N2, CC, TLI, DAG);
|
||||
if (FMinMax)
|
||||
return FMinMax;
|
||||
}
|
||||
|
||||
if ((!LegalOperations &&
|
||||
TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
|
||||
TLI.isOperationLegal(ISD::SELECT_CC, VT))
|
||||
|
@ -1,12 +1,17 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
|
||||
; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FIXME: Should replace unsafe-fp-math with no signed zeros.
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
; FUNC-LABEL: @test_fmax_legacy_uge_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
|
||||
; EG: MAX
|
||||
define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
@ -25,7 +30,8 @@ define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(
|
||||
; FUNC-LABEL: @test_fmax_legacy_oge_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; EG: MAX
|
||||
define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
@ -44,7 +50,8 @@ define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(
|
||||
; FUNC-LABEL: @test_fmax_legacy_ugt_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; EG: MAX
|
||||
define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
@ -63,7 +70,8 @@ define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(
|
||||
; FUNC-LABEL: @test_fmax_legacy_ogt_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; EG: MAX
|
||||
define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
|
@ -1,11 +1,15 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FIXME: Should replace unsafe-fp-math with no signed zeros.
|
||||
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
|
||||
; FUNC-LABEL: @test_fmin_legacy_f32
|
||||
; EG: MIN *
|
||||
; SI: v_min_legacy_f32_e32
|
||||
; SI-SAFE: v_min_legacy_f32_e32
|
||||
; SI-NONAN: v_min_f32_e32
|
||||
define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = extractelement <4 x float> %reg0, i32 1
|
||||
@ -19,7 +23,8 @@ define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> in
|
||||
; FUNC-LABEL: @test_fmin_legacy_ule_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
@ -37,7 +42,8 @@ define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(
|
||||
; FUNC-LABEL: @test_fmin_legacy_ole_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
@ -55,7 +61,8 @@ define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(
|
||||
; FUNC-LABEL: @test_fmin_legacy_olt_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
|
||||
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
@ -73,7 +80,8 @@ define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(
|
||||
; FUNC-LABEL: @test_fmin_legacy_ult_f32
|
||||
; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
|
||||
define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() #1
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
|
Loading…
Reference in New Issue
Block a user