diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 2f7a2763639207..ffc5d2d4cd8fc3 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -899,6 +899,8 @@ void MicrosoftCXXNameMangler::mangleFloat(llvm::APFloat Number) { case APFloat::S_Float8E4M3FNUZ: case APFloat::S_Float8E4M3B11FNUZ: case APFloat::S_FloatTF32: + case APFloat::S_Float6E3M2FN: + case APFloat::S_Float6E2M3FN: llvm_unreachable("Tried to mangle unexpected APFloat semantics"); } diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index 78faadb30d9eb5..a9bb6cc9999b1e 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -189,6 +189,14 @@ struct APFloatBase { // improved range compared to half (16-bit) formats, at (potentially) // greater throughput than single precision (32-bit) formats. S_FloatTF32, + // 6-bit floating point number with bit layout S1E3M2. Unlike IEEE-754 + // types, there are no infinity or NaN values. The format is detailed in + // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf + S_Float6E3M2FN, + // 6-bit floating point number with bit layout S1E2M3. Unlike IEEE-754 + // types, there are no infinity or NaN values. The format is detailed in + // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf + S_Float6E2M3FN, S_x87DoubleExtended, S_MaxSemantics = S_x87DoubleExtended, @@ -209,6 +217,8 @@ struct APFloatBase { static const fltSemantics &Float8E4M3FNUZ() LLVM_READNONE; static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE; static const fltSemantics &FloatTF32() LLVM_READNONE; + static const fltSemantics &Float6E3M2FN() LLVM_READNONE; + static const fltSemantics &Float6E2M3FN() LLVM_READNONE; static const fltSemantics &x87DoubleExtended() LLVM_READNONE; /// A Pseudo fltsemantic used to construct APFloats that cannot conflict with @@ -627,6 +637,8 @@ class IEEEFloat final : public APFloatBase { APInt convertFloat8E4M3FNUZAPFloatToAPInt() const; APInt convertFloat8E4M3B11FNUZAPFloatToAPInt() const; APInt convertFloatTF32APFloatToAPInt() const; + APInt convertFloat6E3M2FNAPFloatToAPInt() const; + APInt convertFloat6E2M3FNAPFloatToAPInt() const; void initFromAPInt(const fltSemantics *Sem, const APInt &api); template void initFromIEEEAPInt(const APInt &api); void initFromHalfAPInt(const APInt &api); @@ -642,6 +654,8 @@ class IEEEFloat final : public APFloatBase { void initFromFloat8E4M3FNUZAPInt(const APInt &api); void initFromFloat8E4M3B11FNUZAPInt(const APInt &api); void initFromFloatTF32APInt(const APInt &api); + void initFromFloat6E3M2FNAPInt(const APInt &api); + void initFromFloat6E2M3FNAPInt(const APInt &api); void assign(const IEEEFloat &); void copySignificand(const IEEEFloat &); @@ -1046,6 +1060,17 @@ class APFloat : public APFloatBase { /// \param Semantics - type float semantics static APFloat getAllOnesValue(const fltSemantics &Semantics); + static bool hasNanOrInf(const fltSemantics &Sem) { + switch (SemanticsToEnum(Sem)) { + default: + return true; + // Below Semantics do not support {NaN or Inf} + case APFloat::S_Float6E3M2FN: + case APFloat::S_Float6E2M3FN: + return false; + } + } + /// Used to insert APFloat objects, or objects that contain APFloat objects, /// into FoldingSets. void Profile(FoldingSetNodeID &NID) const; diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 283fcc153b33aa..1209bf71a287d7 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -68,6 +68,10 @@ enum class fltNonfiniteBehavior { // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available // encodings do not distinguish between signalling and quiet NaN. NanOnly, + + // This behavior is present in Float6E3M2FN and Float6E2M3FN types, + // which do not support Inf or NaN values. + FiniteOnly, }; // How NaN values are represented. This is curently only used in combination @@ -139,6 +143,10 @@ static constexpr fltSemantics semFloat8E4M3FNUZ = { static constexpr fltSemantics semFloat8E4M3B11FNUZ = { 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero}; static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19}; +static constexpr fltSemantics semFloat6E3M2FN = { + 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; +static constexpr fltSemantics semFloat6E2M3FN = { + 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; static constexpr fltSemantics semBogus = {0, 0, 0, 0}; @@ -206,6 +214,10 @@ const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { return Float8E4M3B11FNUZ(); case S_FloatTF32: return FloatTF32(); + case S_Float6E3M2FN: + return Float6E3M2FN(); + case S_Float6E2M3FN: + return Float6E2M3FN(); case S_x87DoubleExtended: return x87DoubleExtended(); } @@ -238,6 +250,10 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { return S_Float8E4M3B11FNUZ; else if (&Sem == &llvm::APFloat::FloatTF32()) return S_FloatTF32; + else if (&Sem == &llvm::APFloat::Float6E3M2FN()) + return S_Float6E3M2FN; + else if (&Sem == &llvm::APFloat::Float6E2M3FN()) + return S_Float6E2M3FN; else if (&Sem == &llvm::APFloat::x87DoubleExtended()) return S_x87DoubleExtended; else @@ -260,6 +276,8 @@ const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { return semFloat8E4M3B11FNUZ; } const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } +const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } +const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } const fltSemantics &APFloatBase::x87DoubleExtended() { return semX87DoubleExtended; } @@ -878,6 +896,9 @@ void IEEEFloat::copySignificand(const IEEEFloat &rhs) { for the significand. If double or longer, this is a signalling NaN, which may not be ideal. If float, this is QNaN(0). */ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) { + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) + llvm_unreachable("This floating point format does not support NaN"); + category = fcNaN; sign = Negative; exponent = exponentNaN(); @@ -1499,16 +1520,18 @@ static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts, /* Handle overflow. Sign is preserved. We either become infinity or the largest finite number. */ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) { - /* Infinity? */ - if (rounding_mode == rmNearestTiesToEven || - rounding_mode == rmNearestTiesToAway || - (rounding_mode == rmTowardPositive && !sign) || - (rounding_mode == rmTowardNegative && sign)) { - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) - makeNaN(false, sign); - else - category = fcInfinity; - return (opStatus) (opOverflow | opInexact); + if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) { + /* Infinity? */ + if (rounding_mode == rmNearestTiesToEven || + rounding_mode == rmNearestTiesToAway || + (rounding_mode == rmTowardPositive && !sign) || + (rounding_mode == rmTowardNegative && sign)) { + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) + makeNaN(false, sign); + else + category = fcInfinity; + return static_cast(opOverflow | opInexact); + } } /* Otherwise we become the largest finite number. */ @@ -3518,13 +3541,15 @@ APInt IEEEFloat::convertIEEEFloatToAPInt() const { myexponent = ::exponentZero(S) + bias; mysignificand.fill(0); } else if (category == fcInfinity) { - if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { + if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || + S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) llvm_unreachable("semantics don't support inf!"); - } myexponent = ::exponentInf(S) + bias; mysignificand.fill(0); } else { assert(category == fcNaN && "Unknown category!"); + if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) + llvm_unreachable("semantics don't support NaN!"); myexponent = ::exponentNaN(S) + bias; std::copy_n(significandParts(), mysignificand.size(), mysignificand.begin()); @@ -3605,6 +3630,16 @@ APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const { return convertIEEEFloatToAPInt(); } +APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt(); +} + +APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt(); +} + // This function creates an APInt that is just a bit map of the floating // point constant as it would appear in memory. It is not a conversion, // and treating the result as a normal integer is unlikely to be useful. @@ -3646,6 +3681,12 @@ APInt IEEEFloat::bitcastToAPInt() const { if (semantics == (const llvm::fltSemantics *)&semFloatTF32) return convertFloatTF32APFloatToAPInt(); + if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN) + return convertFloat6E3M2FNAPFloatToAPInt(); + + if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) + return convertFloat6E2M3FNAPFloatToAPInt(); + assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && "unknown format!"); return convertF80LongDoubleAPFloatToAPInt(); @@ -3862,6 +3903,14 @@ void IEEEFloat::initFromFloatTF32APInt(const APInt &api) { initFromIEEEAPInt(api); } +void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) { + initFromIEEEAPInt(api); +} + +void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { + initFromIEEEAPInt(api); +} + /// Treat api as containing the bits of a floating point number. void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { assert(api.getBitWidth() == Sem->sizeInBits); @@ -3891,6 +3940,10 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { return initFromFloat8E4M3B11FNUZAPInt(api); if (Sem == &semFloatTF32) return initFromFloatTF32APInt(api); + if (Sem == &semFloat6E3M2FN) + return initFromFloat6E3M2FNAPInt(api); + if (Sem == &semFloat6E2M3FN) + return initFromFloat6E2M3FNAPInt(api); llvm_unreachable(nullptr); } @@ -4328,7 +4381,8 @@ int IEEEFloat::getExactLog2Abs() const { bool IEEEFloat::isSignaling() const { if (!isNaN()) return false; - if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly || + semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) return false; // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the @@ -4387,6 +4441,10 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) { // nextUp(getLargest()) == NAN makeNaN(); break; + } else if (semantics->nonFiniteBehavior == + fltNonfiniteBehavior::FiniteOnly) { + // nextUp(getLargest()) == getLargest() + break; } else { // nextUp(getLargest()) == INFINITY APInt::tcSet(significandParts(), 0, partCount()); @@ -4477,6 +4535,9 @@ APFloatBase::ExponentType IEEEFloat::exponentZero() const { } void IEEEFloat::makeInf(bool Negative) { + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly) + llvm_unreachable("This floating point format does not support Inf"); + if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) { // There is no Inf, so make NaN instead. makeNaN(false, Negative); diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp index 6e4dda8351a1b1..7007d944801a75 100644 --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -723,11 +723,13 @@ TEST(APFloatTest, IsSmallestNormalized) { EXPECT_FALSE(APFloat::getZero(Semantics, false).isSmallestNormalized()); EXPECT_FALSE(APFloat::getZero(Semantics, true).isSmallestNormalized()); - EXPECT_FALSE(APFloat::getInf(Semantics, false).isSmallestNormalized()); - EXPECT_FALSE(APFloat::getInf(Semantics, true).isSmallestNormalized()); + if (APFloat::hasNanOrInf(Semantics)) { + EXPECT_FALSE(APFloat::getInf(Semantics, false).isSmallestNormalized()); + EXPECT_FALSE(APFloat::getInf(Semantics, true).isSmallestNormalized()); - EXPECT_FALSE(APFloat::getQNaN(Semantics).isSmallestNormalized()); - EXPECT_FALSE(APFloat::getSNaN(Semantics).isSmallestNormalized()); + EXPECT_FALSE(APFloat::getQNaN(Semantics).isSmallestNormalized()); + EXPECT_FALSE(APFloat::getSNaN(Semantics).isSmallestNormalized()); + } EXPECT_FALSE(APFloat::getLargest(Semantics).isSmallestNormalized()); EXPECT_FALSE(APFloat::getLargest(Semantics, true).isSmallestNormalized()); @@ -1823,6 +1825,9 @@ TEST(APFloatTest, getLargest) { 30, APFloat::getLargest(APFloat::Float8E4M3B11FNUZ()).convertToDouble()); EXPECT_EQ(3.40116213421e+38f, APFloat::getLargest(APFloat::FloatTF32()).convertToFloat()); + EXPECT_EQ(28, APFloat::getLargest(APFloat::Float6E3M2FN()).convertToDouble()); + EXPECT_EQ(7.5, + APFloat::getLargest(APFloat::Float6E2M3FN()).convertToDouble()); } TEST(APFloatTest, getSmallest) { @@ -1881,6 +1886,20 @@ TEST(APFloatTest, getSmallest) { EXPECT_TRUE(test.isFiniteNonZero()); EXPECT_TRUE(test.isDenormal()); EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + test = APFloat::getSmallest(APFloat::Float6E3M2FN(), false); + expected = APFloat(APFloat::Float6E3M2FN(), "0x0.1p0"); + EXPECT_FALSE(test.isNegative()); + EXPECT_TRUE(test.isFiniteNonZero()); + EXPECT_TRUE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + test = APFloat::getSmallest(APFloat::Float6E2M3FN(), false); + expected = APFloat(APFloat::Float6E2M3FN(), "0x0.2p0"); + EXPECT_FALSE(test.isNegative()); + EXPECT_TRUE(test.isFiniteNonZero()); + EXPECT_TRUE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); } TEST(APFloatTest, getSmallestNormalized) { @@ -1963,6 +1982,21 @@ TEST(APFloatTest, getSmallestNormalized) { EXPECT_FALSE(test.isDenormal()); EXPECT_TRUE(test.bitwiseIsEqual(expected)); EXPECT_TRUE(test.isSmallestNormalized()); + test = APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false); + expected = APFloat(APFloat::Float6E3M2FN(), "0x1p-2"); + EXPECT_FALSE(test.isNegative()); + EXPECT_TRUE(test.isFiniteNonZero()); + EXPECT_FALSE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + EXPECT_TRUE(test.isSmallestNormalized()); + + test = APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false); + expected = APFloat(APFloat::Float6E2M3FN(), "0x1p0"); + EXPECT_FALSE(test.isNegative()); + EXPECT_TRUE(test.isFiniteNonZero()); + EXPECT_FALSE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + EXPECT_TRUE(test.isSmallestNormalized()); } TEST(APFloatTest, getZero) { @@ -1996,7 +2030,11 @@ TEST(APFloatTest, getZero) { {&APFloat::Float8E4M3B11FNUZ(), false, false, {0, 0}, 1}, {&APFloat::Float8E4M3B11FNUZ(), true, false, {0, 0}, 1}, {&APFloat::FloatTF32(), false, true, {0, 0}, 1}, - {&APFloat::FloatTF32(), true, true, {0x40000ULL, 0}, 1}}; + {&APFloat::FloatTF32(), true, true, {0x40000ULL, 0}, 1}, + {&APFloat::Float6E3M2FN(), false, true, {0, 0}, 1}, + {&APFloat::Float6E3M2FN(), true, true, {0x20ULL, 0}, 1}, + {&APFloat::Float6E2M3FN(), false, true, {0, 0}, 1}, + {&APFloat::Float6E2M3FN(), true, true, {0x20ULL, 0}, 1}}; const unsigned NumGetZeroTests = std::size(GetZeroTest); for (unsigned i = 0; i < NumGetZeroTests; ++i) { APFloat test = APFloat::getZero(*GetZeroTest[i].semantics, @@ -5161,6 +5199,90 @@ TEST(APFloatTest, Float8ExhaustivePair) { } } +TEST(APFloatTest, Float6ExhaustivePair) { + // Test each pair of 6-bit floats with non-standard semantics + for (APFloat::Semantics Sem : + {APFloat::S_Float6E3M2FN, APFloat::S_Float6E2M3FN}) { + const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem); + for (int i = 1; i < 64; i++) { + for (int j = 1; j < 64; j++) { + SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) + + ",j=" + std::to_string(j)); + APFloat x(S, APInt(6, i)); + APFloat y(S, APInt(6, j)); + + bool losesInfo; + APFloat x16 = x; + x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_FALSE(losesInfo); + APFloat y16 = y; + y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_FALSE(losesInfo); + + // Add + APFloat z = x; + z.add(y, APFloat::rmNearestTiesToEven); + APFloat z16 = x16; + z16.add(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Subtract + z = x; + z.subtract(y, APFloat::rmNearestTiesToEven); + z16 = x16; + z16.subtract(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Multiply + z = x; + z.multiply(y, APFloat::rmNearestTiesToEven); + z16 = x16; + z16.multiply(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Skip divide by 0 + if (j == 0 || j == 32) + continue; + + // Divide + z = x; + z.divide(y, APFloat::rmNearestTiesToEven); + z16 = x16; + z16.divide(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Mod + z = x; + z.mod(y); + z16 = x16; + z16.mod(y16); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Remainder + z = x; + z.remainder(y); + z16 = x16; + z16.remainder(y16); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + } + } + } +} + TEST(APFloatTest, ConvertE4M3FNToE5M2) { bool losesInfo; APFloat test(APFloat::Float8E4M3FN(), "1.0"); @@ -6620,28 +6742,39 @@ TEST(APFloatTest, getExactLog2) { EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2()); EXPECT_EQ(INT_MIN, APFloat(Semantics, "3.0").getExactLog2Abs()); EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2Abs()); - EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat(Semantics, "-8.0").getExactLog2()); - EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2()); - EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2Abs()); - EXPECT_EQ(INT_MIN, APFloat(Semantics, "-0.25").getExactLog2()); - EXPECT_EQ(-2, APFloat(Semantics, "-0.25").getExactLog2Abs()); - EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2Abs()); - EXPECT_EQ(3, APFloat(Semantics, "-8.0").getExactLog2Abs()); + + if (I == APFloat::S_Float6E2M3FN) { + EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2()); + EXPECT_EQ(INT_MIN, APFloat(Semantics, "-4.0").getExactLog2()); + EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2Abs()); + EXPECT_EQ(2, APFloat(Semantics, "-4.0").getExactLog2Abs()); + } else { + EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2()); + EXPECT_EQ(INT_MIN, APFloat(Semantics, "-8.0").getExactLog2()); + EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2()); + EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2Abs()); + EXPECT_EQ(INT_MIN, APFloat(Semantics, "-0.25").getExactLog2()); + EXPECT_EQ(-2, APFloat(Semantics, "-0.25").getExactLog2Abs()); + EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2Abs()); + EXPECT_EQ(3, APFloat(Semantics, "-8.0").getExactLog2Abs()); + } EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, false).getExactLog2()); EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, true).getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2()); - EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, false).getExactLog2Abs()); EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, true).getExactLog2Abs()); - EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2Abs()); - EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2Abs()); - EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2Abs()); - EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2Abs()); + + if (APFloat::hasNanOrInf(Semantics)) { + EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2()); + EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2()); + EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2()); + EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2()); + + EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2Abs()); + EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2Abs()); + EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2Abs()); + EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2Abs()); + } EXPECT_EQ(INT_MIN, scalbn(One, MinExp - Precision - 1, APFloat::rmNearestTiesToEven) @@ -6660,4 +6793,311 @@ TEST(APFloatTest, getExactLog2) { } } +TEST(APFloatTest, Float6E3M2FNFromString) { + // Exactly representable + EXPECT_EQ(28, APFloat(APFloat::Float6E3M2FN(), "28").convertToDouble()); + // Round down to maximum value + EXPECT_EQ(28, APFloat(APFloat::Float6E3M2FN(), "32").convertToDouble()); + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG + EXPECT_DEATH(APFloat(APFloat::Float6E3M2FN(), "inf"), + "This floating point format does not support Inf"); + EXPECT_DEATH(APFloat(APFloat::Float6E3M2FN(), "nan"), + "This floating point format does not support NaN"); +#endif +#endif + + EXPECT_TRUE(APFloat(APFloat::Float6E3M2FN(), "0").isPosZero()); + EXPECT_TRUE(APFloat(APFloat::Float6E3M2FN(), "-0").isNegZero()); +} + +TEST(APFloatTest, Float6E2M3FNFromString) { + // Exactly representable + EXPECT_EQ(7.5, APFloat(APFloat::Float6E2M3FN(), "7.5").convertToDouble()); + // Round down to maximum value + EXPECT_EQ(7.5, APFloat(APFloat::Float6E2M3FN(), "32").convertToDouble()); + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG + EXPECT_DEATH(APFloat(APFloat::Float6E2M3FN(), "inf"), + "This floating point format does not support Inf"); + EXPECT_DEATH(APFloat(APFloat::Float6E2M3FN(), "nan"), + "This floating point format does not support NaN"); +#endif +#endif + + EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "0").isPosZero()); + EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "-0").isNegZero()); +} + +TEST(APFloatTest, ConvertE3M2FToE2M3F) { + bool losesInfo; + APFloat test(APFloat::Float6E3M2FN(), "1.0"); + APFloat::opStatus status = test.convert( + APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(1.0f, test.convertToFloat()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + test = APFloat(APFloat::Float6E3M2FN(), "0.0"); + status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(0.0f, test.convertToFloat()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + // Test overflow + losesInfo = false; + test = APFloat(APFloat::Float6E3M2FN(), "28"); + status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(7.5f, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + EXPECT_EQ(status, APFloat::opInexact); + + // Test underflow + test = APFloat(APFloat::Float6E3M2FN(), ".0625"); + status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(0., test.convertToFloat()); + EXPECT_TRUE(losesInfo); + EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); + + // Testing inexact rounding to denormal number + losesInfo = false; + test = APFloat(APFloat::Float6E3M2FN(), "0.1875"); + status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(0.25, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); +} + +TEST(APFloatTest, ConvertE2M3FToE3M2F) { + bool losesInfo; + APFloat test(APFloat::Float6E2M3FN(), "1.0"); + APFloat::opStatus status = test.convert( + APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(1.0f, test.convertToFloat()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + test = APFloat(APFloat::Float6E2M3FN(), "0.0"); + status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(0.0f, test.convertToFloat()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + test = APFloat(APFloat::Float6E2M3FN(), ".125"); + status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(.125, test.convertToFloat()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + // Test inexact rounding + losesInfo = false; + test = APFloat(APFloat::Float6E2M3FN(), "7.5"); + status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(8, test.convertToFloat()); + EXPECT_TRUE(losesInfo); + EXPECT_EQ(status, APFloat::opInexact); +} + +TEST(APFloatTest, Float6E3M2FNNext) { + APFloat test(APFloat::Float6E3M2FN(), APFloat::uninitialized); + APFloat expected(APFloat::Float6E3M2FN(), APFloat::uninitialized); + + // 1. NextUp of largest bit pattern is the same + test = APFloat::getLargest(APFloat::Float6E3M2FN()); + expected = APFloat::getLargest(APFloat::Float6E3M2FN()); + EXPECT_EQ(test.next(false), APFloat::opOK); + EXPECT_FALSE(test.isInfinity()); + EXPECT_FALSE(test.isZero()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 2. NextUp of smallest negative denormal is -0 + test = APFloat::getSmallest(APFloat::Float6E3M2FN(), true); + expected = APFloat::getZero(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(test.next(false), APFloat::opOK); + EXPECT_TRUE(test.isNegZero()); + EXPECT_FALSE(test.isPosZero()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 3. nextDown of negative of largest value is the same + test = APFloat::getLargest(APFloat::Float6E3M2FN(), true); + expected = test; + EXPECT_EQ(test.next(true), APFloat::opOK); + EXPECT_FALSE(test.isInfinity()); + EXPECT_FALSE(test.isZero()); + EXPECT_FALSE(test.isNaN()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 4. nextDown of +0 is smallest negative denormal + test = APFloat::getZero(APFloat::Float6E3M2FN(), false); + expected = APFloat::getSmallest(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(test.next(true), APFloat::opOK); + EXPECT_FALSE(test.isZero()); + EXPECT_TRUE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); +} + +TEST(APFloatTest, Float6E2M3FNNext) { + APFloat test(APFloat::Float6E2M3FN(), APFloat::uninitialized); + APFloat expected(APFloat::Float6E2M3FN(), APFloat::uninitialized); + + // 1. NextUp of largest bit pattern is the same + test = APFloat::getLargest(APFloat::Float6E2M3FN()); + expected = APFloat::getLargest(APFloat::Float6E2M3FN()); + EXPECT_EQ(test.next(false), APFloat::opOK); + EXPECT_FALSE(test.isInfinity()); + EXPECT_FALSE(test.isZero()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 2. NextUp of smallest negative denormal is -0 + test = APFloat::getSmallest(APFloat::Float6E2M3FN(), true); + expected = APFloat::getZero(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(test.next(false), APFloat::opOK); + EXPECT_TRUE(test.isNegZero()); + EXPECT_FALSE(test.isPosZero()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 3. nextDown of negative of largest value is the same + test = APFloat::getLargest(APFloat::Float6E2M3FN(), true); + expected = test; + EXPECT_EQ(test.next(true), APFloat::opOK); + EXPECT_FALSE(test.isInfinity()); + EXPECT_FALSE(test.isZero()); + EXPECT_FALSE(test.isNaN()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 4. nextDown of +0 is smallest negative denormal + test = APFloat::getZero(APFloat::Float6E2M3FN(), false); + expected = APFloat::getSmallest(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(test.next(true), APFloat::opOK); + EXPECT_FALSE(test.isZero()); + EXPECT_TRUE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); +} + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG +TEST(APFloatTest, Float6E3M2FNGetInfNaN) { + EXPECT_DEATH(APFloat::getInf(APFloat::Float6E3M2FN()), + "This floating point format does not support Inf"); + EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E3M2FN()), + "This floating point format does not support NaN"); +} + +TEST(APFloatTest, Float6E2M3FNGetInfNaN) { + EXPECT_DEATH(APFloat::getInf(APFloat::Float6E2M3FN()), + "This floating point format does not support Inf"); + EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E2M3FN()), + "This floating point format does not support NaN"); +} +#endif +#endif + +TEST(APFloatTest, Float6E3M2FNToDouble) { + APFloat One(APFloat::Float6E3M2FN(), "1.0"); + EXPECT_EQ(1.0, One.convertToDouble()); + APFloat Two(APFloat::Float6E3M2FN(), "2.0"); + EXPECT_EQ(2.0, Two.convertToDouble()); + APFloat PosLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), false); + EXPECT_EQ(28., PosLargest.convertToDouble()); + APFloat NegLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(-28., NegLargest.convertToDouble()); + APFloat PosSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false); + EXPECT_EQ(0x1p-2, PosSmallest.convertToDouble()); + APFloat NegSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(-0x1p-2, NegSmallest.convertToDouble()); + + APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E3M2FN(), false); + EXPECT_TRUE(SmallestDenorm.isDenormal()); + EXPECT_EQ(0x0.1p0, SmallestDenorm.convertToDouble()); +} + +TEST(APFloatTest, Float6E2M3FNToDouble) { + APFloat One(APFloat::Float6E2M3FN(), "1.0"); + EXPECT_EQ(1.0, One.convertToDouble()); + APFloat Two(APFloat::Float6E2M3FN(), "2.0"); + EXPECT_EQ(2.0, Two.convertToDouble()); + APFloat PosLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), false); + EXPECT_EQ(7.5, PosLargest.convertToDouble()); + APFloat NegLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(-7.5, NegLargest.convertToDouble()); + APFloat PosSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false); + EXPECT_EQ(0x1p0, PosSmallest.convertToDouble()); + APFloat NegSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(-0x1p0, NegSmallest.convertToDouble()); + + APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E2M3FN(), false); + EXPECT_TRUE(SmallestDenorm.isDenormal()); + EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToDouble()); +} + +TEST(APFloatTest, Float6E3M2FNToFloat) { + APFloat PosZero = APFloat::getZero(APFloat::Float6E3M2FN()); + APFloat PosZeroToFloat(PosZero.convertToFloat()); + EXPECT_TRUE(PosZeroToFloat.isPosZero()); + APFloat NegZero = APFloat::getZero(APFloat::Float6E3M2FN(), true); + APFloat NegZeroToFloat(NegZero.convertToFloat()); + EXPECT_TRUE(NegZeroToFloat.isNegZero()); + + APFloat One(APFloat::Float6E3M2FN(), "1.0"); + EXPECT_EQ(1.0F, One.convertToFloat()); + APFloat Two(APFloat::Float6E3M2FN(), "2.0"); + EXPECT_EQ(2.0F, Two.convertToFloat()); + + APFloat PosLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), false); + EXPECT_EQ(28., PosLargest.convertToFloat()); + APFloat NegLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(-28, NegLargest.convertToFloat()); + APFloat PosSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false); + EXPECT_EQ(0x1p-2, PosSmallest.convertToFloat()); + APFloat NegSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), true); + EXPECT_EQ(-0x1p-2, NegSmallest.convertToFloat()); + + APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E3M2FN(), false); + EXPECT_TRUE(SmallestDenorm.isDenormal()); + EXPECT_EQ(0x0.1p0, SmallestDenorm.convertToFloat()); +} + +TEST(APFloatTest, Float6E2M3FNToFloat) { + APFloat PosZero = APFloat::getZero(APFloat::Float6E2M3FN()); + APFloat PosZeroToFloat(PosZero.convertToFloat()); + EXPECT_TRUE(PosZeroToFloat.isPosZero()); + APFloat NegZero = APFloat::getZero(APFloat::Float6E2M3FN(), true); + APFloat NegZeroToFloat(NegZero.convertToFloat()); + EXPECT_TRUE(NegZeroToFloat.isNegZero()); + + APFloat One(APFloat::Float6E2M3FN(), "1.0"); + EXPECT_EQ(1.0F, One.convertToFloat()); + APFloat Two(APFloat::Float6E2M3FN(), "2.0"); + EXPECT_EQ(2.0F, Two.convertToFloat()); + + APFloat PosLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), false); + EXPECT_EQ(7.5, PosLargest.convertToFloat()); + APFloat NegLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(-7.5, NegLargest.convertToFloat()); + APFloat PosSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false); + EXPECT_EQ(0x1p0, PosSmallest.convertToFloat()); + APFloat NegSmallest = + APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), true); + EXPECT_EQ(-0x1p0, NegSmallest.convertToFloat()); + + APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E2M3FN(), false); + EXPECT_TRUE(SmallestDenorm.isDenormal()); + EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToFloat()); +} } // namespace