diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index b02a9f20baffd8e7de84fc228971b77074586cbc..db2895e5805faa5ba1092bc265b924a7c5c2ebe6 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -913,13 +913,13 @@ pfrexp_float(const Packet& a, Packet& exponent); * It is expected to be called by implementers of template<> pldexp. */ template EIGEN_STRONG_INLINE Packet -pldexp_float(Packet a, Packet exponent); +pldexp_float(const Packet& a, const Packet& exponent); /** Default implementation of pldexp for double. * It is expected to be called by implementers of template<> pldexp. */ template EIGEN_STRONG_INLINE Packet -pldexp_double(Packet a, Packet exponent); +pldexp_double(const Packet& a, const Packet& exponent); } // end namespace internal diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index e3e91f4ab53cb41887af76334adaa18852aee1d4..8b037fa4f07c895c2f5ada6425c1edd3cf0d6b9e 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -40,30 +40,184 @@ pfrexp_double(const Packet& a, Packet& exponent) { typedef typename unpacket_traits::integer_packet PacketI; const Packet cst_1022d = pset1(1022.0); const Packet cst_half = pset1(0.5); - const Packet cst_inv_mant_mask = pset1frombits(static_cast(~0x7ff0000000000000ull)); + const Packet cst_inv_mant_mask = pset1frombits(static_cast(~0x7ff0000000000000ull)); exponent = psub(pcast(plogical_shift_right<52>(preinterpret(pabs(a)))), cst_1022d); return por(pand(a, cst_inv_mant_mask), cst_half); } -template EIGEN_STRONG_INLINE Packet -pldexp_float(Packet a, Packet exponent) -{ +// Safely applies ldexp, correctly handles overflows, underflows and denormals. +// Assumes IEEE floating point format. +template +struct pldexp_safe_impl { typedef typename unpacket_traits::integer_packet PacketI; - const Packet cst_127 = pset1(127.f); - // return a * 2^exponent - PacketI ei = pcast(padd(exponent, cst_127)); - return pmul(a, preinterpret(plogical_shift_left<23>(ei))); -} + typedef typename unpacket_traits::type Scalar; + typedef typename unpacket_traits::type ScalarI; + typedef typename make_unsigned::type Mask; + enum { + TotalBits = sizeof(Scalar) * CHAR_BIT, + MantissaBits = std::numeric_limits::digits - 1, + ExponentBits = int(TotalBits) - int(MantissaBits) - 1 + }; + + static EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC + Packet run(const Packet& a, const Packet& exponent) { + const Packet sign_mask = pset1frombits(Mask(1) << (int(TotalBits) - 1)); + const Packet fraction_mask = pset1frombits( (Mask(1) << int(MantissaBits)) - 1); + + // Normalize (a, e) + // if |a| < min_value, set a_normal = |a| * 2^d, e = exponent - d, + // otherwise, set a_normal = |a|, e = exponent + const Packet min_value = pset1frombits(Mask(1) << int(MantissaBits)); + const PacketI denormal_shift = pset1(-int(MantissaBits)-1); // -d + const Packet a_sign = pand(a, sign_mask); + const Packet a_abs = pxor(a, a_sign); + const Packet a_is_denormal = pcmp_lt(a_abs, min_value); + const Packet a_normal = pselect(a_is_denormal, + pmul(a_abs, pset1(static_cast(Mask(1) << (int(MantissaBits) + 1)))), + a_abs); + const PacketI cst_zeroi = pzero(denormal_shift); + PacketI e = padd(pcast(exponent), + pselect(preinterpret(a_is_denormal), denormal_shift, cst_zeroi)); + + // Extract mantissa and exponent from a_normal + const Packet f = pand(a_normal, fraction_mask); + const PacketI a_exponent = plogical_shift_right(preinterpret(a_normal)); + + // Since a_normal is normal, a is zero iff a_exponent is zero. + // + // If a_exponent is zero, + // set e = 0, + // If a_exponent is (2^ExponentBits-1), a is either inf or NaN, + // set e = a_exponent (we don't want a negative exponent e to pull away) + // Otherwise + // set e = min(e + a_exponent, 2^ExponentBits-1) + // + // Note that e is now a biased exponent. + const PacketI max_exponent = pset1((ScalarI(1) << int(ExponentBits)) - 1); + e = pselect(pcmp_eq(a_exponent, cst_zeroi), cst_zeroi, + pselect(pcmp_eq(a_exponent, max_exponent), max_exponent, + pmin(padd(e, a_exponent), max_exponent))); + + // If e > 0, + // out = (a_sign | (e << MantissaBits) | a_fraction) + // If e <= -d, + // out = (a_sign | 0 ) + // Otherwise, we have a denormal + // out = (a_sign | (e + d << MantissaBits) | a_fraction) * 2^(-d) + const PacketI out_is_normal = pcmp_lt(cst_zeroi, e); + e = pselect(out_is_normal, e, psub(e, denormal_shift)); + Packet out = por(f, preinterpret(plogical_shift_left(e))); + const Packet factor = pset1frombits( ((Mask(1)<<(int(ExponentBits) - 1)) - int(MantissaBits) - 2) << int(MantissaBits)); + out = pselect(preinterpret(pcmp_le(e, cst_zeroi)), + pzero(a), + pselect(preinterpret(out_is_normal), out, pmul(out, factor))); + return por(a_sign, out); + } +}; + +// Safely applies ldexp, correctly handles overflows, underflows and denormals. +// Assumes IEEE floating point format. +template +struct pldexp_fourmul_impl { + typedef typename unpacket_traits::integer_packet PacketI; + typedef typename unpacket_traits::type Scalar; + typedef typename unpacket_traits::type ScalarI; + enum { + TotalBits = sizeof(Scalar) * CHAR_BIT, + MantissaBits = std::numeric_limits::digits - 1, + ExponentBits = int(TotalBits) - int(MantissaBits) - 1 + }; + + static EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC + Packet run(const Packet& a, const Packet& exponent) { + // Since 'a' and output can be denormal, valid range of exponent is + // -255-23 -> 255+23 + // + // Set e = max(min(exponent, 278), -278); + // + // To avoid overflows, we will divide the exponent by 4 and multiply by + // 4 factors: + // b = floor(e/4) + // out = ((((a * 2^(b)) * 2^(b)) * 2^(b)) * 2^(e-3*b)) + // This will avoid any intermediate overflows and correctly handle 0, inf, + // NaN cases. + const PacketI max_exponent = pset1((ScalarI(1)<((ScalarI(1)<<(int(ExponentBits)-1)) - ScalarI(1)); // 127 + const PacketI e = pmax(pmin(pcast(exponent), max_exponent), pnegate(max_exponent)); + const PacketI b = parithmetic_shift_right<2>(e); // floor(e/4); + Packet c = preinterpret(plogical_shift_left(padd(b, bias))); // 2^b + Packet out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b) + c = preinterpret(plogical_shift_left(padd(psub(psub(psub(e, b), b), b), bias))); // 2^(e-3*b) + out = pmul(out, c); + return out; + } +}; + +// Explicitly multiplies +// a * 2^e +// clamping e to the range +// [std::numeric_limits::min_exponent-2, std::numeric_limits::max_exponent] +// +// This will prematurely over/under-flow if +// a is small, e is big +// a is big, e is small +// +// Assumes IEEE floating point format +template +struct pldexp_clamp_impl { + typedef typename unpacket_traits::integer_packet PacketI; + typedef typename unpacket_traits::type Scalar; + typedef typename unpacket_traits::type ScalarI; + enum { + TotalBits = sizeof(Scalar) * CHAR_BIT, + MantissaBits = std::numeric_limits::digits - 1, + ExponentBits = int(TotalBits) - int(MantissaBits) - 1 + }; + + static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC + Packet run(const Packet& a, const Packet& exponent) { + const ScalarI scalar_bias = (ScalarI(1)<<(int(ExponentBits) - 1)) - ScalarI(1); + const PacketI bias = pset1(scalar_bias); // 127 + const ScalarI scalar_limit = (ScalarI(1)<(scalar_limit); // 255 + // restrict between 0 and 255 + const PacketI e = pmin(pmax(padd(pcast(exponent), bias), pzero(limit)), limit); // e + 127 + return pmul(a, preinterpret(plogical_shift_left(e))); + } +}; + +// Naively applies ldexp, assumes exponent is in the valid range of +// [std::numeric_limits::min_exponent-2, std::numeric_limits::max_exponent] +// +// Assumes IEEE floating point format +template +struct pldexp_unsafe_impl { + typedef typename unpacket_traits::integer_packet PacketI; + typedef typename unpacket_traits::type Scalar; + typedef typename unpacket_traits::type ScalarI; + enum { + TotalBits = sizeof(Scalar) * CHAR_BIT, + MantissaBits = std::numeric_limits::digits - 1, + ExponentBits = int(TotalBits) - int(MantissaBits) - 1 + }; + + static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC + Packet run(const Packet& a, const Packet& exponent) { + const ScalarI scalar_bias = (ScalarI(1)<<(int(ExponentBits) - 1)) - ScalarI(1); + const PacketI bias = pset1(scalar_bias); // 127 + // restrict between 0 and 255 + const PacketI e = padd(pcast(exponent), bias); // e + 127 + return pmul(a, preinterpret(plogical_shift_left(e))); + } +}; + +template EIGEN_STRONG_INLINE Packet +pldexp_float(const Packet& a, const Packet& exponent) +{ return pldexp_fourmul_impl::run(a, exponent); } template EIGEN_STRONG_INLINE Packet pldexp_double(Packet a, Packet exponent) -{ - typedef typename unpacket_traits::integer_packet PacketI; - const Packet cst_1023 = pset1(1023.0); - // return a * 2^exponent - PacketI ei = pcast(padd(exponent, cst_1023)); - return pmul(a, preinterpret(plogical_shift_left<52>(ei))); -} +{ return pldexp_fourmul_impl::run(a, exponent); } // Natural or base 2 logarithm. // Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2) diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h index a623f54cb17245b612bdc83935a3e587dc74e827..f461eb505cb2b3053f1807dadfe099b94ed9492e 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h @@ -28,7 +28,7 @@ template EIGEN_STRONG_INLINE Packet pfrexp_double(const Packet& a, Packet& exponent); template EIGEN_STRONG_INLINE Packet -pldexp_float(Packet a, Packet exponent); +pldexp_float(const Packet& a, const Packet& exponent); /** \internal \returns log(x) for single precision float */ template diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 05d9f8edd69553e7f24b4d9db6e2b64254a28531..72a572a70e7cfbd80ad442fce6ce5886f1da37d4 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -399,21 +399,6 @@ template<> EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, con } #endif -template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return _mm_cmple_ps(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return _mm_cmplt_ps(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return _mm_cmpnge_ps(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return _mm_cmpeq_ps(a,b); } - -template<> EIGEN_STRONG_INLINE Packet2d pcmp_le(const Packet2d& a, const Packet2d& b) { return _mm_cmple_pd(a,b); } -template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt(const Packet2d& a, const Packet2d& b) { return _mm_cmplt_pd(a,b); } -template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) { return _mm_cmpnge_pd(a,b); } -template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); } - -template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return _mm_cmplt_epi32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return _mm_cmpeq_epi32(a,b); } -template<> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { return _mm_cmpeq_epi8(a,b); } - - template<> EIGEN_STRONG_INLINE Packet4i ptrue(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); } template<> EIGEN_STRONG_INLINE Packet16b ptrue(const Packet16b& a) { return _mm_cmpeq_epi8(a, a); } template<> EIGEN_STRONG_INLINE Packet4f @@ -447,6 +432,21 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, con template<> EIGEN_STRONG_INLINE Packet2d pandnot(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(b,a); } template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(b,a); } +template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return _mm_cmple_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return _mm_cmplt_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return _mm_cmpnge_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return _mm_cmpeq_ps(a,b); } + +template<> EIGEN_STRONG_INLINE Packet2d pcmp_le(const Packet2d& a, const Packet2d& b) { return _mm_cmple_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt(const Packet2d& a, const Packet2d& b) { return _mm_cmplt_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) { return _mm_cmpnge_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); } + +template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return _mm_cmplt_epi32(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return _mm_cmpeq_epi32(a,b); } +template<> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { return _mm_cmpeq_epi8(a,b); } +template<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return por(pcmp_lt(a,b), pcmp_eq(a,b)); } + template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63 // There appears to be a bug in GCC, by which the optimizer may diff --git a/test/packetmath.cpp b/test/packetmath.cpp index b7562e6a1a525d85a7fd9cc23a39b5b30bedb6e1..5dc016b95d8e5b953188c14e4b16daf7a9f9e4b7 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -573,10 +573,20 @@ void packetmath_real() { data2[i] = Scalar(internal::random(-1, 1)); } for (int i = 0; i < PacketSize; ++i) { - data1[i+PacketSize] = Scalar(internal::random(0, 4)); - data2[i+PacketSize] = Scalar(internal::random(0, 4)); + data1[i+PacketSize] = Scalar(internal::random(-4, 4)); + data2[i+PacketSize] = Scalar(internal::random(-4, 4)); } CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp); + if (PacketTraits::HasExp) { + data1[0] = Scalar(-1); + data1[PacketSize] = Scalar(std::numeric_limits::min_exponent-10); + CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp); + data1[PacketSize] = Scalar(std::numeric_limits::max_exponent+10); + CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp); + data1[0] = NumTraits::quiet_NaN(); + CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp); + VERIFY((numext::isnan)(data2[0])); + } for (int i = 0; i < size; ++i) { data1[i] = Scalar(internal::random(-1, 1) * std::pow(10., internal::random(-6, 6)));