diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 2299e4d48be15d543dd84dcbcdada5364f1d161c..0680c46e805662d55c6b304ab02324db18091328 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -758,15 +758,18 @@ template<> EIGEN_STRONG_INLINE Packet8f pldexp(const Packet8f& a, cons template<> EIGEN_STRONG_INLINE Packet4d pldexp(const Packet4d& a, const Packet4d& exponent) { // Build e=2^n by constructing the exponents in a 128-bit vector and // shifting them to where they belong in double-precision values. - Packet4i cst_1023 = pset1(1023); - __m128i emm0 = _mm256_cvtpd_epi32(exponent); + const Packet4i cst_1023 = pset1(1023); + const Packet4d e = pmin(pmax(exponent, + pset1(-1023.0)), + pset1(1024.0)); + __m128i emm0 = _mm256_cvtpd_epi32(e); emm0 = _mm_add_epi32(emm0, cst_1023); emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0)); __m128i lo = _mm_slli_epi64(emm0, 52); __m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52); - __m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0); - e = _mm256_insertf128_si256(e, hi, 1); - return pmul(a,_mm256_castsi256_pd(e)); + __m256i b = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0); + b = _mm256_insertf128_si256(b, hi, 1); + return pmul(a,_mm256_castsi256_pd(b)); } template<> EIGEN_STRONG_INLINE float predux(const Packet8f& a) diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index fa43d88099d0fa6e30af4f01c2f68eb594cfcc8b..e850d2ae731db80c2a07bfc80222fd4a27d39479 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -919,8 +919,11 @@ template<> EIGEN_STRONG_INLINE Packet16f pldexp(const Packet16f& a, c template<> EIGEN_STRONG_INLINE Packet8d pldexp(const Packet8d& a, const Packet8d& exponent) { // Build e=2^n by constructing the exponents in a 256-bit vector and // shifting them to where they belong in double-precision values. - Packet8i cst_1023 = pset1(1023); - __m256i emm0 = _mm512_cvtpd_epi32(exponent); + const Packet8i cst_1023 = pset1(1023); + const Packet8d e = pmin(pmax(exponent, + pset1(-1023.0)), + pset1(1024.0)); + __m256i emm0 = _mm512_cvtpd_epi32(e); emm0 = _mm256_add_epi32(emm0, cst_1023); emm0 = _mm256_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0)); __m256i lo = _mm256_slli_epi64(emm0, 52); diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index fdf4f1e9ce77f6bc70ceb8841b3897ae3895ed27..54fafc91cb313219fedf7d0587e2fe5691dc954e 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -2455,7 +2455,10 @@ static inline Packet2l ConvertToPacket2l(const Packet2d& x) { template<> EIGEN_STRONG_INLINE Packet2d pldexp(const Packet2d& a, const Packet2d& exponent) { // build 2^n - Packet2l emm0 = ConvertToPacket2l(exponent); + const Packet2d e = pmin(pmax(exponent, + pset1(-1023.0)), + pset1(1024.0)); + Packet2l emm0 = ConvertToPacket2l(e); #ifdef __POWER8_VECTOR__ const Packet2l p2l_1023 = { 1023, 1023 }; diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index e3e91f4ab53cb41887af76334adaa18852aee1d4..7b5f1bd57cb6496637265b213814d8a7872d5699 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -50,8 +50,12 @@ pldexp_float(Packet a, Packet exponent) { typedef typename unpacket_traits::integer_packet PacketI; const Packet cst_127 = pset1(127.f); + // Add offset and clamp. + const Packet e = pmin(pmax(padd(exponent, cst_127), + pset1(0.0f)), + pset1(255.0f)); // return a * 2^exponent - PacketI ei = pcast(padd(exponent, cst_127)); + PacketI ei = pcast(e); return pmul(a, preinterpret(plogical_shift_left<23>(ei))); } @@ -60,8 +64,12 @@ pldexp_double(Packet a, Packet exponent) { typedef typename unpacket_traits::integer_packet PacketI; const Packet cst_1023 = pset1(1023.0); + // Add offset and clamp. + const Packet e = pmin(pmax(padd(exponent, cst_1023), + pset1(0.0)), + pset1(2047.0)); // return a * 2^exponent - PacketI ei = pcast(padd(exponent, cst_1023)); + PacketI ei = pcast(e); return pmul(a, preinterpret(plogical_shift_left<52>(ei))); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 05d9f8edd69553e7f24b4d9db6e2b64254a28531..0cca426df09cdc2203e2c7ac25dc5115e8d3c229 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -908,10 +908,13 @@ template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, cons // supported by SSE, and has more range than is needed for exponents. template<> EIGEN_STRONG_INLINE Packet2d pldexp(const Packet2d& a, const Packet2d& exponent) { const Packet2d cst_1023 = pset1(1023.0); - // Add exponent offset. - Packet4i ei = _mm_cvtpd_epi32(padd(exponent, cst_1023)); - // Convert to exponents to int64 and swizzle to the low-order 32 bits. - ei = vec4i_swizzle1(ei, 0, 3, 1, 3); + // Add exponent offset and clamp. + const Packet2d e = pmin(pmax(padd(exponent, cst_1023), + pset1(0.0)), + pset1(2047.0)); + + // Convert exponents to int64 and swizzle to the low-order 32 bits. + const Packet4i ei = vec4i_swizzle1(_mm_cvtpd_epi32(e), 0, 3, 1, 3); // return a * 2^exponent return pmul(a, _mm_castsi128_pd(_mm_slli_epi64(ei, 52))); } diff --git a/test/packetmath.cpp b/test/packetmath.cpp index b7562e6a1a525d85a7fd9cc23a39b5b30bedb6e1..5dc016b95d8e5b953188c14e4b16daf7a9f9e4b7 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -573,10 +573,20 @@ void packetmath_real() { data2[i] = Scalar(internal::random(-1, 1)); } for (int i = 0; i < PacketSize; ++i) { - data1[i+PacketSize] = Scalar(internal::random(0, 4)); - data2[i+PacketSize] = Scalar(internal::random(0, 4)); + data1[i+PacketSize] = Scalar(internal::random(-4, 4)); + data2[i+PacketSize] = Scalar(internal::random(-4, 4)); } CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp); + if (PacketTraits::HasExp) { + data1[0] = Scalar(-1); + data1[PacketSize] = Scalar(std::numeric_limits::min_exponent-10); + CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp); + data1[PacketSize] = Scalar(std::numeric_limits::max_exponent+10); + CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp); + data1[0] = NumTraits::quiet_NaN(); + CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp); + VERIFY((numext::isnan)(data2[0])); + } for (int i = 0; i < size; ++i) { data1[i] = Scalar(internal::random(-1, 1) * std::pow(10., internal::random(-6, 6)));