mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-14 03:50:48 +00:00
[SelectionDAG] remove constant folding limitations based on FP exceptions
We don't have FP exception limits in the IR constant folder for the binops (apart from strict ops), so it does not make sense to have them here in the DAG either. Nothing else in the backend tries to preserve exceptions (again outside of strict ops), so I don't see how this could have ever worked for real code that cares about FP exceptions. There are still cases (examples: unary opcodes in SDAG, FMA in IR) where we are trying (at least partially) to preserve exceptions without even asking if the target supports FP exceptions. Those should be corrected in subsequent patches. Real support for FP exceptions requires several changes to handle the constrained/strict FP ops. Differential Revision: https://reviews.llvm.org/D61331 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359791 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3314e6578b
commit
14debde13b
@ -582,11 +582,6 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Return true if target supports floating point exceptions.
|
||||
bool hasFloatingPointExceptions() const {
|
||||
return HasFloatingPointExceptions;
|
||||
}
|
||||
|
||||
/// Return true if target always beneficiates from combining into FMA for a
|
||||
/// given value type. This must typically return false on targets where FMA
|
||||
/// takes more cycles to execute than FADD.
|
||||
@ -1915,12 +1910,6 @@ protected:
|
||||
/// control.
|
||||
void setJumpIsExpensive(bool isExpensive = true);
|
||||
|
||||
/// Tells the code generator that this target supports floating point
|
||||
/// exceptions and cares about preserving floating point exception behavior.
|
||||
void setHasFloatingPointExceptions(bool FPExceptions = true) {
|
||||
HasFloatingPointExceptions = FPExceptions;
|
||||
}
|
||||
|
||||
/// Tells the code generator which bitwidths to bypass.
|
||||
void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
|
||||
BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
|
||||
@ -2580,10 +2569,6 @@ private:
|
||||
/// predication.
|
||||
bool JumpIsExpensive;
|
||||
|
||||
/// Whether the target supports or cares about preserving floating point
|
||||
/// exception behavior.
|
||||
bool HasFloatingPointExceptions;
|
||||
|
||||
/// This target prefers to use _setjmp to implement llvm.setjmp.
|
||||
///
|
||||
/// Defaults to false.
|
||||
|
@ -4804,38 +4804,30 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
|
||||
|
||||
SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
|
||||
EVT VT, SDValue N1, SDValue N2) {
|
||||
// TODO: We don't do any constant folding for strict FP opcodes here, but we
|
||||
// should. That will require dealing with a potentially non-default
|
||||
// rounding mode, checking the "opStatus" return value from the APFloat
|
||||
// math calculations, and possibly other variations.
|
||||
auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
|
||||
auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
|
||||
bool HasFPExceptions = TLI->hasFloatingPointExceptions();
|
||||
if (N1CFP && N2CFP) {
|
||||
APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF();
|
||||
APFloat::opStatus Status;
|
||||
switch (Opcode) {
|
||||
case ISD::FADD:
|
||||
Status = C1.add(C2, APFloat::rmNearestTiesToEven);
|
||||
if (!HasFPExceptions || Status != APFloat::opInvalidOp)
|
||||
return getConstantFP(C1, DL, VT);
|
||||
break;
|
||||
C1.add(C2, APFloat::rmNearestTiesToEven);
|
||||
return getConstantFP(C1, DL, VT);
|
||||
case ISD::FSUB:
|
||||
Status = C1.subtract(C2, APFloat::rmNearestTiesToEven);
|
||||
if (!HasFPExceptions || Status != APFloat::opInvalidOp)
|
||||
return getConstantFP(C1, DL, VT);
|
||||
break;
|
||||
C1.subtract(C2, APFloat::rmNearestTiesToEven);
|
||||
return getConstantFP(C1, DL, VT);
|
||||
case ISD::FMUL:
|
||||
Status = C1.multiply(C2, APFloat::rmNearestTiesToEven);
|
||||
if (!HasFPExceptions || Status != APFloat::opInvalidOp)
|
||||
return getConstantFP(C1, DL, VT);
|
||||
break;
|
||||
C1.multiply(C2, APFloat::rmNearestTiesToEven);
|
||||
return getConstantFP(C1, DL, VT);
|
||||
case ISD::FDIV:
|
||||
Status = C1.divide(C2, APFloat::rmNearestTiesToEven);
|
||||
if (!HasFPExceptions || Status != APFloat::opInvalidOp)
|
||||
return getConstantFP(C1, DL, VT);
|
||||
break;
|
||||
C1.divide(C2, APFloat::rmNearestTiesToEven);
|
||||
return getConstantFP(C1, DL, VT);
|
||||
case ISD::FREM:
|
||||
Status = C1.mod(C2);
|
||||
if (!HasFPExceptions || Status != APFloat::opInvalidOp)
|
||||
return getConstantFP(C1, DL, VT);
|
||||
break;
|
||||
C1.mod(C2);
|
||||
return getConstantFP(C1, DL, VT);
|
||||
case ISD::FCOPYSIGN:
|
||||
C1.copySign(C2);
|
||||
return getConstantFP(C1, DL, VT);
|
||||
@ -5311,10 +5303,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
||||
APFloat V1 = N1CFP->getValueAPF();
|
||||
const APFloat &V2 = N2CFP->getValueAPF();
|
||||
const APFloat &V3 = N3CFP->getValueAPF();
|
||||
APFloat::opStatus s =
|
||||
V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
|
||||
if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
|
||||
return getConstantFP(V1, DL, VT);
|
||||
V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
|
||||
return getConstantFP(V1, DL, VT);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -545,7 +545,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
|
||||
JumpIsExpensive = JumpIsExpensiveOverride;
|
||||
PredictableSelectIsExpensive = false;
|
||||
EnableExtLdPromotion = false;
|
||||
HasFloatingPointExceptions = true;
|
||||
StackPointerRegisterToSaveRestore = 0;
|
||||
BooleanContents = UndefinedBooleanContent;
|
||||
BooleanFloatContents = UndefinedBooleanContent;
|
||||
|
@ -729,11 +729,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setTargetDAGCombine(ISD::ATOMIC_LOAD_FADD);
|
||||
|
||||
setSchedulingPreference(Sched::RegPressure);
|
||||
|
||||
// SI at least has hardware support for floating point exceptions, but no way
|
||||
// of using or handling them is implemented. They are also optional in OpenCL
|
||||
// (Section 7.3)
|
||||
setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
|
||||
}
|
||||
|
||||
const GCNSubtarget *SITargetLowering::getSubtarget() const {
|
||||
|
@ -45,9 +45,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
|
||||
setBooleanContents(ZeroOrOneBooleanContent);
|
||||
// Except in SIMD vectors
|
||||
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
|
||||
// WebAssembly does not produce floating-point exceptions on normal floating
|
||||
// point operations.
|
||||
setHasFloatingPointExceptions(false);
|
||||
// We don't know the microarchitecture here, so just reduce register pressure.
|
||||
setSchedulingPreference(Sched::RegPressure);
|
||||
// Tell ISel that we have a stack pointer.
|
||||
|
@ -18,10 +18,9 @@ define double @constant_fold_fdiv_by_zero(double* %p) {
|
||||
define double @constant_fold_frem_by_zero(double* %p) {
|
||||
; CHECK-LABEL: constant_fold_frem_by_zero:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #1
|
||||
; CHECK-NEXT: fmov d1, xzr
|
||||
; CHECK-NEXT: mov x8, #9221120237041090560
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: b fmod
|
||||
; CHECK-NEXT: ret
|
||||
%r = frem double 4.940660e-324, 0.0
|
||||
ret double %r
|
||||
}
|
||||
@ -31,10 +30,8 @@ define double @constant_fold_frem_by_zero(double* %p) {
|
||||
define double @constant_fold_fmul_nan(double* %p) {
|
||||
; CHECK-LABEL: constant_fold_fmul_nan:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #9218868437227405312
|
||||
; CHECK-NEXT: fmov d0, xzr
|
||||
; CHECK-NEXT: fmov d1, x8
|
||||
; CHECK-NEXT: fmul d0, d1, d0
|
||||
; CHECK-NEXT: mov x8, #9221120237041090560
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: ret
|
||||
%r = fmul double 0x7ff0000000000000, 0.0
|
||||
ret double %r
|
||||
@ -45,11 +42,8 @@ define double @constant_fold_fmul_nan(double* %p) {
|
||||
define double @constant_fold_fadd_nan(double* %p) {
|
||||
; CHECK-LABEL: constant_fold_fadd_nan:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #-4503599627370496
|
||||
; CHECK-NEXT: mov x9, #9218868437227405312
|
||||
; CHECK-NEXT: mov x8, #9221120237041090560
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: fmov d1, x9
|
||||
; CHECK-NEXT: fadd d0, d1, d0
|
||||
; CHECK-NEXT: ret
|
||||
%r = fadd double 0x7ff0000000000000, 0xfff0000000000000
|
||||
ret double %r
|
||||
@ -60,9 +54,8 @@ define double @constant_fold_fadd_nan(double* %p) {
|
||||
define double @constant_fold_fsub_nan(double* %p) {
|
||||
; CHECK-LABEL: constant_fold_fsub_nan:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #9218868437227405312
|
||||
; CHECK-NEXT: mov x8, #9221120237041090560
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: fsub d0, d0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%r = fsub double 0x7ff0000000000000, 0x7ff0000000000000
|
||||
ret double %r
|
||||
@ -73,12 +66,8 @@ define double @constant_fold_fsub_nan(double* %p) {
|
||||
define double @constant_fold_fma_nan(double* %p) {
|
||||
; CHECK-LABEL: constant_fold_fma_nan:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, #4631107791820423168
|
||||
; CHECK-NEXT: mov x9, #9218868437227405312
|
||||
; CHECK-NEXT: fmov d0, xzr
|
||||
; CHECK-NEXT: fmov d1, x8
|
||||
; CHECK-NEXT: fmov d2, x9
|
||||
; CHECK-NEXT: fmadd d0, d2, d0, d1
|
||||
; CHECK-NEXT: mov x8, #9221120237041090560
|
||||
; CHECK-NEXT: fmov d0, x8
|
||||
; CHECK-NEXT: ret
|
||||
%r = call double @llvm.fma.f64(double 0x7ff0000000000000, double 0.0, double 42.0)
|
||||
ret double %r
|
||||
|
Loading…
x
Reference in New Issue
Block a user