mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-11 18:36:55 +00:00
Make IEEEFloat::roundToIntegral more standard conformant
Behavior of IEEEFloat::roundToIntegral is aligned with IEEE-754 operation roundToIntegralExact. In partucular this function now: - returns opInvalid for signaling NaNs, - returns opInexact if the result of rounding differs from argument. Differential Revision: https://reviews.llvm.org/D75246
This commit is contained in:
parent
c0ad75e758
commit
14a1b80e04
@ -1977,14 +1977,59 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
|
||||
return fs;
|
||||
}
|
||||
|
||||
/* Rounding-mode corrrect round to integral value. */
|
||||
/* Rounding-mode correct round to integral value. */
|
||||
IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
|
||||
opStatus fs;
|
||||
|
||||
if (isInfinity())
|
||||
// [IEEE Std 754-2008 6.1]:
|
||||
// The behavior of infinity in floating-point arithmetic is derived from the
|
||||
// limiting cases of real arithmetic with operands of arbitrarily
|
||||
// large magnitude, when such a limit exists.
|
||||
// ...
|
||||
// Operations on infinite operands are usually exact and therefore signal no
|
||||
// exceptions ...
|
||||
return opOK;
|
||||
|
||||
if (isNaN()) {
|
||||
if (isSignaling()) {
|
||||
// [IEEE Std 754-2008 6.2]:
|
||||
// Under default exception handling, any operation signaling an invalid
|
||||
// operation exception and for which a floating-point result is to be
|
||||
// delivered shall deliver a quiet NaN.
|
||||
makeQuiet();
|
||||
// [IEEE Std 754-2008 6.2]:
|
||||
// Signaling NaNs shall be reserved operands that, under default exception
|
||||
// handling, signal the invalid operation exception(see 7.2) for every
|
||||
// general-computational and signaling-computational operation except for
|
||||
// the conversions described in 5.12.
|
||||
return opInvalidOp;
|
||||
} else {
|
||||
// [IEEE Std 754-2008 6.2]:
|
||||
// For an operation with quiet NaN inputs, other than maximum and minimum
|
||||
// operations, if a floating-point result is to be delivered the result
|
||||
// shall be a quiet NaN which should be one of the input NaNs.
|
||||
// ...
|
||||
// Every general-computational and quiet-computational operation involving
|
||||
// one or more input NaNs, none of them signaling, shall signal no
|
||||
// exception, except fusedMultiplyAdd might signal the invalid operation
|
||||
// exception(see 7.2).
|
||||
return opOK;
|
||||
}
|
||||
}
|
||||
|
||||
if (isZero()) {
|
||||
// [IEEE Std 754-2008 6.3]:
|
||||
// ... the sign of the result of conversions, the quantize operation, the
|
||||
// roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
|
||||
// the sign of the first or only operand.
|
||||
return opOK;
|
||||
}
|
||||
|
||||
// If the exponent is large enough, we know that this value is already
|
||||
// integral, and the arithmetic below would potentially cause it to saturate
|
||||
// to +/-Inf. Bail out early instead.
|
||||
if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
|
||||
if (exponent+1 >= (int)semanticsPrecision(*semantics))
|
||||
return opOK;
|
||||
|
||||
// The algorithm here is quite simple: we add 2^(p-1), where p is the
|
||||
@ -1998,19 +2043,18 @@ IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
|
||||
IEEEFloat MagicConstant(*semantics);
|
||||
fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
|
||||
rmNearestTiesToEven);
|
||||
assert(fs == opOK);
|
||||
MagicConstant.sign = sign;
|
||||
|
||||
if (fs != opOK)
|
||||
return fs;
|
||||
|
||||
// Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
|
||||
// Preserve the input sign so that we can handle the case of zero result
|
||||
// correctly.
|
||||
bool inputSign = isNegative();
|
||||
|
||||
fs = add(MagicConstant, rounding_mode);
|
||||
if (fs != opOK && fs != opInexact)
|
||||
return fs;
|
||||
|
||||
fs = subtract(MagicConstant, rounding_mode);
|
||||
// Current value and 'MagicConstant' are both integers, so the result of the
|
||||
// subtraction is always exact according to Sterbenz' lemma.
|
||||
subtract(MagicConstant, rounding_mode);
|
||||
|
||||
// Restore the input sign.
|
||||
if (inputSign != isNegative())
|
||||
|
@ -1525,6 +1525,124 @@ TEST(APFloatTest, roundToIntegral) {
|
||||
P = APFloat::getInf(APFloat::IEEEdouble(), true);
|
||||
P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_TRUE(std::isinf(P.convertToDouble()) && P.convertToDouble() < 0.0);
|
||||
|
||||
APFloat::opStatus St;
|
||||
|
||||
P = APFloat::getNaN(APFloat::IEEEdouble());
|
||||
St = P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_TRUE(P.isNaN());
|
||||
EXPECT_FALSE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opOK, St);
|
||||
|
||||
P = APFloat::getNaN(APFloat::IEEEdouble(), true);
|
||||
St = P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_TRUE(P.isNaN());
|
||||
EXPECT_TRUE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opOK, St);
|
||||
|
||||
P = APFloat::getSNaN(APFloat::IEEEdouble());
|
||||
St = P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_TRUE(P.isNaN());
|
||||
EXPECT_FALSE(P.isSignaling());
|
||||
EXPECT_FALSE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opInvalidOp, St);
|
||||
|
||||
P = APFloat::getSNaN(APFloat::IEEEdouble(), true);
|
||||
St = P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_TRUE(P.isNaN());
|
||||
EXPECT_FALSE(P.isSignaling());
|
||||
EXPECT_TRUE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opInvalidOp, St);
|
||||
|
||||
P = APFloat::getInf(APFloat::IEEEdouble());
|
||||
St = P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_TRUE(P.isInfinity());
|
||||
EXPECT_FALSE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opOK, St);
|
||||
|
||||
P = APFloat::getInf(APFloat::IEEEdouble(), true);
|
||||
St = P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_TRUE(P.isInfinity());
|
||||
EXPECT_TRUE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opOK, St);
|
||||
|
||||
P = APFloat::getZero(APFloat::IEEEdouble(), false);
|
||||
St = P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_TRUE(P.isZero());
|
||||
EXPECT_FALSE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opOK, St);
|
||||
|
||||
P = APFloat::getZero(APFloat::IEEEdouble(), false);
|
||||
St = P.roundToIntegral(APFloat::rmTowardNegative);
|
||||
EXPECT_TRUE(P.isZero());
|
||||
EXPECT_FALSE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opOK, St);
|
||||
|
||||
P = APFloat::getZero(APFloat::IEEEdouble(), true);
|
||||
St = P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_TRUE(P.isZero());
|
||||
EXPECT_TRUE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opOK, St);
|
||||
|
||||
P = APFloat::getZero(APFloat::IEEEdouble(), true);
|
||||
St = P.roundToIntegral(APFloat::rmTowardNegative);
|
||||
EXPECT_TRUE(P.isZero());
|
||||
EXPECT_TRUE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opOK, St);
|
||||
|
||||
P = APFloat(1E-100);
|
||||
St = P.roundToIntegral(APFloat::rmTowardNegative);
|
||||
EXPECT_TRUE(P.isZero());
|
||||
EXPECT_FALSE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opInexact, St);
|
||||
|
||||
P = APFloat(1E-100);
|
||||
St = P.roundToIntegral(APFloat::rmTowardPositive);
|
||||
EXPECT_EQ(1.0, P.convertToDouble());
|
||||
EXPECT_FALSE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opInexact, St);
|
||||
|
||||
P = APFloat(-1E-100);
|
||||
St = P.roundToIntegral(APFloat::rmTowardNegative);
|
||||
EXPECT_TRUE(P.isNegative());
|
||||
EXPECT_EQ(-1.0, P.convertToDouble());
|
||||
EXPECT_EQ(APFloat::opInexact, St);
|
||||
|
||||
P = APFloat(-1E-100);
|
||||
St = P.roundToIntegral(APFloat::rmTowardPositive);
|
||||
EXPECT_TRUE(P.isZero());
|
||||
EXPECT_TRUE(P.isNegative());
|
||||
EXPECT_EQ(APFloat::opInexact, St);
|
||||
|
||||
P = APFloat(10.0);
|
||||
St = P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_EQ(10.0, P.convertToDouble());
|
||||
EXPECT_EQ(APFloat::opOK, St);
|
||||
|
||||
P = APFloat(10.5);
|
||||
St = P.roundToIntegral(APFloat::rmTowardZero);
|
||||
EXPECT_EQ(10.0, P.convertToDouble());
|
||||
EXPECT_EQ(APFloat::opInexact, St);
|
||||
|
||||
P = APFloat(10.5);
|
||||
St = P.roundToIntegral(APFloat::rmTowardPositive);
|
||||
EXPECT_EQ(11.0, P.convertToDouble());
|
||||
EXPECT_EQ(APFloat::opInexact, St);
|
||||
|
||||
P = APFloat(10.5);
|
||||
St = P.roundToIntegral(APFloat::rmTowardNegative);
|
||||
EXPECT_EQ(10.0, P.convertToDouble());
|
||||
EXPECT_EQ(APFloat::opInexact, St);
|
||||
|
||||
P = APFloat(10.5);
|
||||
St = P.roundToIntegral(APFloat::rmNearestTiesToAway);
|
||||
EXPECT_EQ(11.0, P.convertToDouble());
|
||||
EXPECT_EQ(APFloat::opInexact, St);
|
||||
|
||||
P = APFloat(10.5);
|
||||
St = P.roundToIntegral(APFloat::rmNearestTiesToEven);
|
||||
EXPECT_EQ(10.0, P.convertToDouble());
|
||||
EXPECT_EQ(APFloat::opInexact, St);
|
||||
}
|
||||
|
||||
TEST(APFloatTest, isInteger) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user