From e5d0078ed3884aede57c94f7fbbf4b519653fe24 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Fri, 7 Nov 2025 19:54:49 +0800 Subject: [PATCH] Use more FMA in reciprocal iteration for precision Typical Newton iteration for reciprocal is mul(x, fma(-a, x, 2)), which can also be written as fma(x, fma(-a, x, 1), x). Here two level of fma operations improves overall precision. --- Eigen/src/Core/MathFunctionsImpl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index c4b5da3cc..43d9d646a 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -37,15 +37,16 @@ struct generic_reciprocal_newton_step { static_assert(Steps > 0, "Steps must be at least 1."); EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_a_recip) { using Scalar = typename unpacket_traits::type; - const Packet two = pset1(Scalar(2)); + const Packet one = pset1(Scalar(1)); // Refine the approximation using one Newton-Raphson step: // x_{i} = x_{i-1} * (2 - a * x_{i-1}) const Packet x = generic_reciprocal_newton_step::run(a, approx_a_recip); - const Packet tmp = pnmadd(a, x, two); + const Packet tmp = pnmadd(a, x, one); // If tmp is NaN, it means that a is either +/-0 or +/-Inf. // In this case return the approximation directly. const Packet is_not_nan = pcmp_eq(tmp, tmp); - return pselect(is_not_nan, pmul(x, tmp), x); + // Use two FMAs instead of FMA+FMUL to improve precision. + return pselect(is_not_nan, pmadd(x, tmp, x), x); } }; -- GitLab