AMDGPU: Fold out sign bit ops on frexp_exp

The sign bit has no impact on the exponent, so strip these away. Saves
on the source modifier encoding cost. I left the GlobalISel handling
until there's a resolution to issue #62628.

We should do this in instcombine too, but legalization should be
introducing more frexps than it currently is where this would occur.
This commit is contained in:
Matt Arsenault 2023-07-03 10:38:04 -04:00
parent 90b83a6d6c
commit 8ee1cc82c9
2 changed files with 36 additions and 4 deletions

View File

@ -1496,6 +1496,17 @@ static SDValue peekFNeg(SDValue Val) {
return Val;
}
static SDValue peekFPSignOps(SDValue Val) {
if (Val.getOpcode() == ISD::FNEG)
Val = Val.getOperand(0);
if (Val.getOpcode() == ISD::FABS)
Val = Val.getOperand(0);
if (Val.getOpcode() == ISD::FCOPYSIGN)
Val = Val.getOperand(0);
return Val;
}
SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
SDValue False, SDValue CC, DAGCombinerInfo &DCI) const {
@ -3664,6 +3675,17 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
SDValue Src = N->getOperand(1);
return Src.isUndef() ? Src : SDValue();
}
case Intrinsic::amdgcn_frexp_exp: {
// frexp_exp (fneg x) -> frexp_exp x
// frexp_exp (fabs x) -> frexp_exp x
// frexp_exp (fneg (fabs x)) -> frexp_exp x
SDValue Src = N->getOperand(1);
SDValue PeekSign = peekFPSignOps(Src);
if (PeekSign == Src)
return SDValue();
return SDValue(DCI.DAG.UpdateNodeOperands(N, N->getOperand(0), PeekSign),
0);
}
default:
return SDValue();
}

View File

@ -2,6 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare float @llvm.fabs.f32(float) #0
declare float @llvm.copysign.f32(float, float) #0
declare double @llvm.fabs.f64(double) #0
declare i32 @llvm.amdgcn.frexp.exp.i32.f32(float) #0
declare i32 @llvm.amdgcn.frexp.exp.i32.f64(double) #0
@ -15,7 +16,7 @@ define amdgpu_kernel void @s_test_frexp_exp_f32(ptr addrspace(1) %out, float %sr
}
; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f32:
; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, |{{s[0-9]+}}|
; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
%fabs.src = call float @llvm.fabs.f32(float %src)
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fabs.src)
@ -24,7 +25,7 @@ define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, floa
}
; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f32:
; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, -|{{s[0-9]+}}|
; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
%fabs.src = call float @llvm.fabs.f32(float %src)
%fneg.fabs.src = fneg float %fabs.src
@ -33,6 +34,15 @@ define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out,
ret void
}
; GCN-LABEL: {{^}}s_test_copysign_frexp_exp_f32:
; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @s_test_copysign_frexp_exp_f32(ptr addrspace(1) %out, float %src, float %sign) #1 {
%copysign = call float @llvm.copysign.f32(float %src, float %sign)
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %copysign)
store i32 %frexp.exp, ptr addrspace(1) %out
ret void
}
; GCN-LABEL: {{^}}s_test_frexp_exp_f64:
; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
@ -42,7 +52,7 @@ define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %s
}
; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f64:
; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, |{{s\[[0-9]+:[0-9]+\]}}|
; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
%fabs.src = call double @llvm.fabs.f64(double %src)
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fabs.src)
@ -51,7 +61,7 @@ define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, doub
}
; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f64:
; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, -|{{s\[[0-9]+:[0-9]+\]}}|
; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
%fabs.src = call double @llvm.fabs.f64(double %src)
%fneg.fabs.src = fneg double %fabs.src