mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-07 08:34:59 +00:00
AMDGPU: Fold out sign bit ops on frexp_exp
The sign bit has no impact on the exponent, so strip these away. Saves on the source modifier encoding cost. I left the GlobalISel handling until there's a resolution to issue #62628. We should do this in instcombine too, but legalization should be introducing more frexps than it currently is where this would occur.
This commit is contained in:
parent
90b83a6d6c
commit
8ee1cc82c9
@ -1496,6 +1496,17 @@ static SDValue peekFNeg(SDValue Val) {
|
||||
|
||||
return Val;
|
||||
}
|
||||
|
||||
static SDValue peekFPSignOps(SDValue Val) {
|
||||
if (Val.getOpcode() == ISD::FNEG)
|
||||
Val = Val.getOperand(0);
|
||||
if (Val.getOpcode() == ISD::FABS)
|
||||
Val = Val.getOperand(0);
|
||||
if (Val.getOpcode() == ISD::FCOPYSIGN)
|
||||
Val = Val.getOperand(0);
|
||||
return Val;
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
|
||||
const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
|
||||
SDValue False, SDValue CC, DAGCombinerInfo &DCI) const {
|
||||
@ -3664,6 +3675,17 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
|
||||
SDValue Src = N->getOperand(1);
|
||||
return Src.isUndef() ? Src : SDValue();
|
||||
}
|
||||
case Intrinsic::amdgcn_frexp_exp: {
|
||||
// frexp_exp (fneg x) -> frexp_exp x
|
||||
// frexp_exp (fabs x) -> frexp_exp x
|
||||
// frexp_exp (fneg (fabs x)) -> frexp_exp x
|
||||
SDValue Src = N->getOperand(1);
|
||||
SDValue PeekSign = peekFPSignOps(Src);
|
||||
if (PeekSign == Src)
|
||||
return SDValue();
|
||||
return SDValue(DCI.DAG.UpdateNodeOperands(N, N->getOperand(0), PeekSign),
|
||||
0);
|
||||
}
|
||||
default:
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare float @llvm.fabs.f32(float) #0
|
||||
declare float @llvm.copysign.f32(float, float) #0
|
||||
declare double @llvm.fabs.f64(double) #0
|
||||
declare i32 @llvm.amdgcn.frexp.exp.i32.f32(float) #0
|
||||
declare i32 @llvm.amdgcn.frexp.exp.i32.f64(double) #0
|
||||
@ -15,7 +16,7 @@ define amdgpu_kernel void @s_test_frexp_exp_f32(ptr addrspace(1) %out, float %sr
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f32:
|
||||
; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, |{{s[0-9]+}}|
|
||||
; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
|
||||
define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
|
||||
%fabs.src = call float @llvm.fabs.f32(float %src)
|
||||
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fabs.src)
|
||||
@ -24,7 +25,7 @@ define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, floa
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f32:
|
||||
; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, -|{{s[0-9]+}}|
|
||||
; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
|
||||
define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
|
||||
%fabs.src = call float @llvm.fabs.f32(float %src)
|
||||
%fneg.fabs.src = fneg float %fabs.src
|
||||
@ -33,6 +34,15 @@ define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out,
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_test_copysign_frexp_exp_f32:
|
||||
; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
|
||||
define amdgpu_kernel void @s_test_copysign_frexp_exp_f32(ptr addrspace(1) %out, float %src, float %sign) #1 {
|
||||
%copysign = call float @llvm.copysign.f32(float %src, float %sign)
|
||||
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %copysign)
|
||||
store i32 %frexp.exp, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_test_frexp_exp_f64:
|
||||
; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
|
||||
define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
|
||||
@ -42,7 +52,7 @@ define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %s
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f64:
|
||||
; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, |{{s\[[0-9]+:[0-9]+\]}}|
|
||||
; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
|
||||
define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
|
||||
%fabs.src = call double @llvm.fabs.f64(double %src)
|
||||
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fabs.src)
|
||||
@ -51,7 +61,7 @@ define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, doub
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f64:
|
||||
; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, -|{{s\[[0-9]+:[0-9]+\]}}|
|
||||
; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
|
||||
define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
|
||||
%fabs.src = call double @llvm.fabs.f64(double %src)
|
||||
%fneg.fabs.src = fneg double %fabs.src
|
||||
|
Loading…
Reference in New Issue
Block a user