mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-02 18:42:36 +00:00
[AArch64] Match fminnum/fmaxnum for vector fminnm/fmaxnm instead of an intrinsic.
Lower Intrinsic::aarch64_neon_fmin/fmax to fminnum/fmannum and match that instead. Minimal functional change: - Extra tests added because coverage of scalar fminnm/fmaxnm instructions was nonexistant. - f16 test updated because now we actually generate scalar fminnm/fmaxnm we no longer need to bail out to a libcall! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244595 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
42fd74f775
commit
3b4cd46ad5
@ -389,6 +389,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::FRINT, Ty, Legal);
|
||||
setOperationAction(ISD::FTRUNC, Ty, Legal);
|
||||
setOperationAction(ISD::FROUND, Ty, Legal);
|
||||
setOperationAction(ISD::FMINNUM, Ty, Legal);
|
||||
setOperationAction(ISD::FMAXNUM, Ty, Legal);
|
||||
}
|
||||
|
||||
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
|
||||
@ -679,9 +681,10 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
|
||||
ISD::SABSDIFF, ISD::UABSDIFF})
|
||||
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
|
||||
|
||||
// F[MIN|MAX]NAN are available for all FP NEON types.
|
||||
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
|
||||
if (VT.isFloatingPoint())
|
||||
for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN})
|
||||
for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
|
||||
ISD::FMINNUM, ISD::FMAXNUM})
|
||||
setOperationAction(Opcode, VT.getSimpleVT(), Legal);
|
||||
|
||||
if (Subtarget->isLittleEndian()) {
|
||||
@ -8233,6 +8236,12 @@ static SDValue performIntrinsicCombine(SDNode *N,
|
||||
case Intrinsic::aarch64_neon_uabd:
|
||||
return DAG.getNode(ISD::UABSDIFF, SDLoc(N), N->getValueType(0),
|
||||
N->getOperand(1), N->getOperand(2));
|
||||
case Intrinsic::aarch64_neon_fmaxnm:
|
||||
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
|
||||
N->getOperand(1), N->getOperand(2));
|
||||
case Intrinsic::aarch64_neon_fminnm:
|
||||
return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
|
||||
N->getOperand(1), N->getOperand(2));
|
||||
case Intrinsic::aarch64_neon_smull:
|
||||
case Intrinsic::aarch64_neon_umull:
|
||||
case Intrinsic::aarch64_neon_pmull:
|
||||
|
@ -2502,9 +2502,9 @@ defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>;
|
||||
let SchedRW = [WriteFDiv] in {
|
||||
defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
|
||||
}
|
||||
defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>;
|
||||
defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
|
||||
defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaxnan>;
|
||||
defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>;
|
||||
defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
|
||||
defm FMIN : TwoOperandFPData<0b0101, "fmin", fminnan>;
|
||||
let SchedRW = [WriteFMul] in {
|
||||
defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
|
||||
@ -2516,9 +2516,9 @@ def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
|
||||
(FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
|
||||
def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
|
||||
(FMINDrr FPR64:$Rn, FPR64:$Rm)>;
|
||||
def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
|
||||
def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
|
||||
(FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
|
||||
def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
|
||||
def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
|
||||
(FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2804,11 +2804,11 @@ defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
|
||||
defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
|
||||
defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
|
||||
defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
|
||||
defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>;
|
||||
defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", fmaxnum>;
|
||||
defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>;
|
||||
defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", fmaxnan>;
|
||||
defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>;
|
||||
defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>;
|
||||
defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", fminnum>;
|
||||
defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>;
|
||||
defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", fminnan>;
|
||||
|
||||
|
@ -42,13 +42,28 @@ define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp
|
||||
ret <2 x double> %vminnm2.i
|
||||
}
|
||||
|
||||
define float @f7(float %a, float %b) nounwind readnone ssp {
|
||||
; CHECK: fmaxnm s0, s0, s1
|
||||
; CHECK: ret
|
||||
%vmaxnm2.i = tail call float @llvm.aarch64.neon.fmaxnm.f32(float %a, float %b) nounwind
|
||||
ret float %vmaxnm2.i
|
||||
}
|
||||
|
||||
define double @f8(double %a, double %b) nounwind readnone ssp {
|
||||
; CHECK: fminnm d0, d0, d1
|
||||
; CHECK: ret
|
||||
%vmaxnm2.i = tail call double @llvm.aarch64.neon.fminnm.f64(double %a, double %b) nounwind
|
||||
ret double %vmaxnm2.i
|
||||
}
|
||||
|
||||
declare <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
|
||||
declare <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
declare <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
|
||||
declare <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||
|
||||
declare float @llvm.aarch64.neon.fmaxnm.f32(float, float) nounwind readnone
|
||||
declare double @llvm.aarch64.neon.fminnm.f64(double, double) nounwind readnone
|
||||
|
||||
define double @test_fmaxnmv(<2 x double> %in) {
|
||||
; CHECK-LABEL: test_fmaxnmv:
|
||||
|
@ -644,13 +644,10 @@ define half @test_fabs(half %a) #0 {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_minnum:
|
||||
; CHECK-NEXT: stp x29, x30, [sp, #-16]!
|
||||
; CHECK-NEXT: mov x29, sp
|
||||
; CHECK-NEXT: fcvt s0, h0
|
||||
; CHECK-NEXT: fcvt s1, h1
|
||||
; CHECK-NEXT: bl {{_?}}fminf
|
||||
; CHECK-NEXT: fcvt s0, h0
|
||||
; CHECK-NEXT: fminnm s0, s0, s1
|
||||
; CHECK-NEXT: fcvt h0, s0
|
||||
; CHECK-NEXT: ldp x29, x30, [sp], #16
|
||||
; CHECK-NEXT: ret
|
||||
define half @test_minnum(half %a, half %b) #0 {
|
||||
%r = call half @llvm.minnum.f16(half %a, half %b)
|
||||
@ -658,13 +655,10 @@ define half @test_minnum(half %a, half %b) #0 {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test_maxnum:
|
||||
; CHECK-NEXT: stp x29, x30, [sp, #-16]!
|
||||
; CHECK-NEXT: mov x29, sp
|
||||
; CHECK-NEXT: fcvt s0, h0
|
||||
; CHECK-NEXT: fcvt s1, h1
|
||||
; CHECK-NEXT: bl {{_?}}fmaxf
|
||||
; CHECK-NEXT: fcvt s0, h0
|
||||
; CHECK-NEXT: fmaxnm s0, s0, s1
|
||||
; CHECK-NEXT: fcvt h0, s0
|
||||
; CHECK-NEXT: ldp x29, x30, [sp], #16
|
||||
; CHECK-NEXT: ret
|
||||
define half @test_maxnum(half %a, half %b) #0 {
|
||||
%r = call half @llvm.maxnum.f16(half %a, half %b)
|
||||
|
Loading…
x
Reference in New Issue
Block a user