diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index b02a9f20baffd8e7de84fc228971b77074586cbc..db2895e5805faa5ba1092bc265b924a7c5c2ebe6 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -913,13 +913,13 @@ pfrexp_float(const Packet& a, Packet& exponent);
   * It is expected to be called by implementers of template<> pldexp.
   */
 template<typename Packet> EIGEN_STRONG_INLINE Packet
-pldexp_float(Packet a, Packet exponent);
+pldexp_float(const Packet& a, const Packet& exponent);
 
 /** Default implementation of pldexp for double.
   * It is expected to be called by implementers of template<> pldexp.
   */
 template<typename Packet> EIGEN_STRONG_INLINE Packet
-pldexp_double(Packet a, Packet exponent);
+pldexp_double(const Packet& a, const Packet& exponent);
 
 } // end namespace internal
 
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index e3e91f4ab53cb41887af76334adaa18852aee1d4..8b037fa4f07c895c2f5ada6425c1edd3cf0d6b9e 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -40,30 +40,184 @@ pfrexp_double(const Packet& a, Packet& exponent) {
   typedef typename unpacket_traits<Packet>::integer_packet PacketI;
   const Packet cst_1022d = pset1<Packet>(1022.0);
   const Packet cst_half = pset1<Packet>(0.5);
-  const Packet cst_inv_mant_mask  = pset1frombits<Packet>(static_cast<uint64_t>(~0x7ff0000000000000ull));
+  const Packet cst_inv_mant_mask  = pset1frombits<Packet, uint64_t>(static_cast<uint64_t>(~0x7ff0000000000000ull));
   exponent = psub(pcast<PacketI,Packet>(plogical_shift_right<52>(preinterpret<PacketI>(pabs<Packet>(a)))), cst_1022d);
   return por(pand(a, cst_inv_mant_mask), cst_half);
 }
 
-template<typename Packet> EIGEN_STRONG_INLINE Packet
-pldexp_float(Packet a, Packet exponent)
-{
+// Safely applies ldexp, correctly handles overflows, underflows and denormals.
+// Assumes IEEE floating point format.
+template<typename Packet>
+struct pldexp_safe_impl {
   typedef typename unpacket_traits<Packet>::integer_packet PacketI;
-  const Packet cst_127 = pset1<Packet>(127.f);
-  // return a * 2^exponent
-  PacketI ei = pcast<Packet,PacketI>(padd(exponent, cst_127));
-  return pmul(a, preinterpret<Packet>(plogical_shift_left<23>(ei)));
-}
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  typedef typename unpacket_traits<PacketI>::type ScalarI;
+  typedef typename make_unsigned<ScalarI>::type Mask;
+  enum {
+    TotalBits = sizeof(Scalar) * CHAR_BIT,
+    MantissaBits = std::numeric_limits<Scalar>::digits - 1,
+    ExponentBits = int(TotalBits) - int(MantissaBits) - 1
+  };
+   
+  static EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC
+  Packet run(const Packet& a, const Packet& exponent) {
+    const Packet sign_mask = pset1frombits<Packet, Mask>(Mask(1) << (int(TotalBits) - 1));
+    const Packet fraction_mask = pset1frombits<Packet, Mask>( (Mask(1) << int(MantissaBits)) - 1);
+
+    // Normalize (a, e)
+    //   if |a| < min_value, set a_normal = |a| * 2^d, e = exponent - d,
+    //   otherwise,          set a_normal = |a|,       e = exponent
+    const Packet min_value = pset1frombits<Packet, Mask>(Mask(1) << int(MantissaBits));
+    const PacketI denormal_shift = pset1<PacketI>(-int(MantissaBits)-1);  // -d
+    const Packet a_sign = pand(a, sign_mask);
+    const Packet a_abs = pxor(a, a_sign);
+    const Packet a_is_denormal = pcmp_lt(a_abs, min_value);
+    const Packet a_normal = pselect(a_is_denormal, 
+                                    pmul(a_abs, pset1<Packet>(static_cast<Scalar>(Mask(1) << (int(MantissaBits) + 1)))), 
+                                    a_abs);
+    const PacketI cst_zeroi = pzero(denormal_shift);
+    PacketI e = padd(pcast<Packet, PacketI>(exponent), 
+                     pselect(preinterpret<PacketI>(a_is_denormal), denormal_shift, cst_zeroi));
+    
+    // Extract mantissa and exponent from a_normal
+    const Packet f = pand(a_normal, fraction_mask);
+    const PacketI a_exponent = plogical_shift_right<int(MantissaBits)>(preinterpret<PacketI>(a_normal));
+    
+    // Since a_normal is normal, a is zero iff a_exponent is zero.
+    //
+    // If a_exponent is zero, 
+    //    set e = 0, 
+    // If a_exponent is (2^ExponentBits-1), a is either inf or NaN, 
+    //    set e = a_exponent  (we don't want a negative exponent e to pull away)
+    // Otherwise 
+    //    set e = min(e + a_exponent, 2^ExponentBits-1)
+    //
+    // Note that e is now a biased exponent.
+    const PacketI max_exponent = pset1<PacketI>((ScalarI(1) << int(ExponentBits)) - 1);
+    e = pselect(pcmp_eq(a_exponent, cst_zeroi), cst_zeroi, 
+                pselect(pcmp_eq(a_exponent, max_exponent), max_exponent,
+                        pmin(padd(e, a_exponent), max_exponent)));
+    
+    // If e > 0, 
+    //    out = (a_sign | (e << MantissaBits) | a_fraction)
+    // If e <= -d,
+    //    out = (a_sign | 0 )
+    // Otherwise, we have a denormal
+    //    out =  (a_sign | (e + d << MantissaBits) | a_fraction) * 2^(-d)
+    const PacketI out_is_normal = pcmp_lt(cst_zeroi, e);
+    e = pselect(out_is_normal, e, psub(e, denormal_shift));
+    Packet out = por(f, preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(e)));
+    const Packet factor = pset1frombits<Packet, Mask>( ((Mask(1)<<(int(ExponentBits) - 1)) - int(MantissaBits) - 2) << int(MantissaBits));
+    out = pselect(preinterpret<Packet>(pcmp_le(e, cst_zeroi)), 
+                  pzero(a), 
+                  pselect(preinterpret<Packet>(out_is_normal), out, pmul(out, factor)));
+    return por(a_sign, out);
+  }
+};
+
+// Safely applies ldexp, correctly handles overflows, underflows and denormals.
+// Assumes IEEE floating point format.
+template<typename Packet>
+struct pldexp_fourmul_impl {
+  typedef typename unpacket_traits<Packet>::integer_packet PacketI;
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  typedef typename unpacket_traits<PacketI>::type ScalarI;
+  enum {
+    TotalBits = sizeof(Scalar) * CHAR_BIT,
+    MantissaBits = std::numeric_limits<Scalar>::digits - 1,
+    ExponentBits = int(TotalBits) - int(MantissaBits) - 1
+  };
+   
+  static EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC
+  Packet run(const Packet& a, const Packet& exponent) {
+    // Since 'a' and output can be denormal, valid range of exponent is
+    //   -255-23 -> 255+23
+    //
+    // Set e = max(min(exponent, 278), -278);
+    //
+    // To avoid overflows, we will divide the exponent by 4 and multiply by
+    // 4 factors:
+    //   b = floor(e/4)
+    //   out = ((((a * 2^(b)) * 2^(b)) * 2^(b)) * 2^(e-3*b))
+    // This will avoid any intermediate overflows and correctly handle 0, inf,
+    // NaN cases.
+    const PacketI max_exponent = pset1<PacketI>((ScalarI(1)<<int(ExponentBits)) + ScalarI(MantissaBits) - ScalarI(1));  // 278
+    const PacketI bias = pset1<PacketI>((ScalarI(1)<<(int(ExponentBits)-1)) - ScalarI(1));  // 127
+    const PacketI e = pmax(pmin(pcast<Packet, PacketI>(exponent), max_exponent), pnegate(max_exponent));
+    const PacketI b = parithmetic_shift_right<2>(e); // floor(e/4);
+    Packet c = preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(padd(b, bias)));  // 2^b
+    Packet out = pmul(pmul(pmul(a, c), c), c);  // a * 2^(3b)
+    c = preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(padd(psub(psub(psub(e, b), b), b), bias)));  // 2^(e-3*b)
+    out = pmul(out, c);
+    return out;
+  }
+};
+
+// Explicitly multiplies 
+//    a * 2^e
+// clamping e to the range
+// [std::numeric_limits<Scalar>::min_exponent-2, std::numeric_limits<Scalar>::max_exponent]
+//
+// This will prematurely over/under-flow if
+//    a is small, e is big
+//    a is big, e is small
+//
+// Assumes IEEE floating point format
+template<typename Packet>
+struct pldexp_clamp_impl {
+  typedef typename unpacket_traits<Packet>::integer_packet PacketI;
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  typedef typename unpacket_traits<PacketI>::type ScalarI;
+  enum {
+    TotalBits = sizeof(Scalar) * CHAR_BIT,
+    MantissaBits = std::numeric_limits<Scalar>::digits - 1,
+    ExponentBits = int(TotalBits) - int(MantissaBits) - 1
+  };
+  
+  static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+  Packet run(const Packet& a, const Packet& exponent) {
+    const ScalarI scalar_bias = (ScalarI(1)<<(int(ExponentBits) - 1)) - ScalarI(1);
+    const PacketI bias = pset1<PacketI>(scalar_bias);   // 127 
+    const ScalarI scalar_limit = (ScalarI(1)<<int(ExponentBits)) - ScalarI(1);
+    const PacketI limit = pset1<PacketI>(scalar_limit); // 255
+    // restrict between 0 and 255
+    const PacketI e = pmin(pmax(padd(pcast<Packet,PacketI>(exponent), bias), pzero(limit)), limit); // e + 127
+    return pmul(a, preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(e)));
+  }
+};
+
+// Naively applies ldexp, assumes exponent is in the valid range of
+// [std::numeric_limits<Scalar>::min_exponent-2, std::numeric_limits<Scalar>::max_exponent]
+//
+// Assumes IEEE floating point format
+template<typename Packet>
+struct pldexp_unsafe_impl {
+  typedef typename unpacket_traits<Packet>::integer_packet PacketI;
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  typedef typename unpacket_traits<PacketI>::type ScalarI;
+  enum {
+    TotalBits = sizeof(Scalar) * CHAR_BIT,
+    MantissaBits = std::numeric_limits<Scalar>::digits - 1,
+    ExponentBits = int(TotalBits) - int(MantissaBits) - 1
+  };
+  
+  static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
+  Packet run(const Packet& a, const Packet& exponent) {
+    const ScalarI scalar_bias = (ScalarI(1)<<(int(ExponentBits) - 1)) - ScalarI(1);
+    const PacketI bias = pset1<PacketI>(scalar_bias);   // 127 
+    // restrict between 0 and 255
+    const PacketI e = padd(pcast<Packet,PacketI>(exponent), bias); // e + 127
+    return pmul(a, preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(e)));
+  }
+};
+
+template<typename Packet> EIGEN_STRONG_INLINE Packet
+pldexp_float(const Packet& a, const Packet& exponent)
+{ return pldexp_fourmul_impl<Packet>::run(a, exponent); }
 
 template<typename Packet> EIGEN_STRONG_INLINE Packet
 pldexp_double(Packet a, Packet exponent)
-{
-  typedef typename unpacket_traits<Packet>::integer_packet PacketI;
-  const Packet cst_1023 = pset1<Packet>(1023.0);
-  // return a * 2^exponent
-  PacketI ei = pcast<Packet,PacketI>(padd(exponent, cst_1023));
-  return pmul(a, preinterpret<Packet>(plogical_shift_left<52>(ei)));
-}
+{ return pldexp_fourmul_impl<Packet>::run(a, exponent); }
 
 // Natural or base 2 logarithm.
 // Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
index a623f54cb17245b612bdc83935a3e587dc74e827..f461eb505cb2b3053f1807dadfe099b94ed9492e 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h
@@ -28,7 +28,7 @@ template<typename Packet> EIGEN_STRONG_INLINE Packet
 pfrexp_double(const Packet& a, Packet& exponent);
 
 template<typename Packet> EIGEN_STRONG_INLINE Packet
-pldexp_float(Packet a, Packet exponent);
+pldexp_float(const Packet& a, const Packet& exponent);
 
 /** \internal \returns log(x) for single precision float */
 template <typename Packet>
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 05d9f8edd69553e7f24b4d9db6e2b64254a28531..72a572a70e7cfbd80ad442fce6ce5886f1da37d4 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -399,21 +399,6 @@ template<> EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, con
 }
 #endif
 
-template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return _mm_cmple_ps(a,b); }
-template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return _mm_cmplt_ps(a,b); }
-template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return _mm_cmpnge_ps(a,b); }
-template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return _mm_cmpeq_ps(a,b); }
-
-template<> EIGEN_STRONG_INLINE Packet2d pcmp_le(const Packet2d& a, const Packet2d& b) { return _mm_cmple_pd(a,b); }
-template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt(const Packet2d& a, const Packet2d& b) { return _mm_cmplt_pd(a,b); }
-template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) { return _mm_cmpnge_pd(a,b); }
-template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); }
-
-template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return _mm_cmplt_epi32(a,b); }
-template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return _mm_cmpeq_epi32(a,b); }
-template<> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { return _mm_cmpeq_epi8(a,b); }
-
-
 template<> EIGEN_STRONG_INLINE Packet4i ptrue<Packet4i>(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); }
 template<> EIGEN_STRONG_INLINE Packet16b ptrue<Packet16b>(const Packet16b& a) { return _mm_cmpeq_epi8(a, a); }
 template<> EIGEN_STRONG_INLINE Packet4f
@@ -447,6 +432,21 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, con
 template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(b,a); }
 template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(b,a); }
 
+template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return _mm_cmple_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return _mm_cmplt_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return _mm_cmpnge_ps(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return _mm_cmpeq_ps(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet2d pcmp_le(const Packet2d& a, const Packet2d& b) { return _mm_cmple_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt(const Packet2d& a, const Packet2d& b) { return _mm_cmplt_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) { return _mm_cmpnge_pd(a,b); }
+template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); }
+
+template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return _mm_cmplt_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return _mm_cmpeq_epi32(a,b); }
+template<> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { return _mm_cmpeq_epi8(a,b); }
+template<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return por(pcmp_lt(a,b), pcmp_eq(a,b)); }
+
 template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
 #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
   // There appears to be a bug in GCC, by which the optimizer may
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index b7562e6a1a525d85a7fd9cc23a39b5b30bedb6e1..5dc016b95d8e5b953188c14e4b16daf7a9f9e4b7 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -573,10 +573,20 @@ void packetmath_real() {
     data2[i] = Scalar(internal::random<double>(-1, 1));
   }
   for (int i = 0; i < PacketSize; ++i) {
-    data1[i+PacketSize] = Scalar(internal::random<int>(0, 4));
-    data2[i+PacketSize] = Scalar(internal::random<double>(0, 4));
+    data1[i+PacketSize] = Scalar(internal::random<int>(-4, 4));
+    data2[i+PacketSize] = Scalar(internal::random<double>(-4, 4));
   }
   CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
+  if (PacketTraits::HasExp) {
+    data1[0] = Scalar(-1);
+    data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::min_exponent-10);
+    CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
+    data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::max_exponent+10);
+    CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
+    data1[0] = NumTraits<Scalar>::quiet_NaN();
+    CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
+    VERIFY((numext::isnan)(data2[0]));
+  }
 
   for (int i = 0; i < size; ++i) {
     data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6)));