diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 155fdad20ee237407f805e8717288475d19f655f..5e36ce84d4324c517cef8119fe0db469f4d4331a 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -1004,8 +1004,7 @@ struct madd_impl { } }; -// Use FMA if there is a single CPU instruction. -#ifdef EIGEN_VECTORIZE_FMA +#if EIGEN_SCALAR_MADD_USE_FMA template struct madd_impl::value>> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& x, const Scalar& y, const Scalar& z) { @@ -1927,7 +1926,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar arithmetic_shift_right(const Scalar return bit_cast(bit_cast(a) >> n); } -// Otherwise, rely on template implementation. template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar fma(const Scalar& x, const Scalar& y, const Scalar& z) { return internal::fma_impl::run(x, y, z); diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index db4a63089b08e4172978cff0d97cc55ed02ee4c5..dad36716906a691278244b20afae606933da15df 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -52,6 +52,26 @@ #define EIGEN_STACK_ALLOCATION_LIMIT 131072 #endif +/* Specify whether to use std::fma for scalar multiply-add instructions. + * + * On machines that have FMA as a single instruction, this will generally + * improve precision without significant performance implications. + * + * Without a single instruction, performance has been found to be reduced 2-3x + * on Intel CPUs, and up to 30x for WASM. + * + * If unspecified, defaults to using FMA if hardware support is available. + * The default should be used in most cases to ensure consistency between + * vectorized and non-vectorized paths. + */ +#ifndef EIGEN_SCALAR_MADD_USE_FMA +#ifdef EIGEN_VECTORIZE_FMA +#define EIGEN_SCALAR_MADD_USE_FMA 1 +#else +#define EIGEN_SCALAR_MADD_USE_FMA 0 +#endif +#endif + //------------------------------------------------------------------------------------------ // Compiler identification, EIGEN_COMP_* //------------------------------------------------------------------------------------------ diff --git a/doc/PreprocessorDirectives.dox b/doc/PreprocessorDirectives.dox index 27ae531f69d5f30925746b3ebf85e9cbc9028efb..f4af9074464393e0f3d766cd2347e58058e2ef79 100644 --- a/doc/PreprocessorDirectives.dox +++ b/doc/PreprocessorDirectives.dox @@ -18,9 +18,6 @@ one option, and other parts (or libraries that you use) are compiled with anothe fail to link or exhibit subtle bugs. Nevertheless, these options can be useful for people who know what they are doing. - - \b EIGEN2_SUPPORT and \b EIGEN2_SUPPORT_STAGEnn_xxx are disabled starting from the 3.3 release. - Defining one of these will raise a compile-error. If you need to compile Eigen2 code, - check this site. - \b EIGEN_DEFAULT_DENSE_INDEX_TYPE - the type for column and row indices in matrices, vectors and array (DenseBase::Index). Set to \c std::ptrdiff_t by default. - \b EIGEN_DEFAULT_IO_FORMAT - the IOFormat to use when printing a matrix if no %IOFormat is specified. @@ -44,7 +41,7 @@ are doing. preferable. Not defined by default. \warning See the documentation of \c EIGEN_INITIALIZE_MATRICES_BY_ZERO for a discussion on a limitations of these macros when applied to \c 1x1, \c 1x2, and \c 2x1 fixed-size matrices. - - \b EIGEN_NO_AUTOMATIC_RESIZING - if defined, the matrices (or arrays) on both sides of an assignment + - \b EIGEN_NO_AUTOMATIC_RESIZING - if defined, the matrices (or arrays) on both sides of an assignment a = b have to be of the same size; otherwise, %Eigen automatically resizes \c a so that it is of the correct size. Not defined by default. @@ -72,8 +69,8 @@ The %Eigen library contains many assertions to guard against programming errors, run time. However, these assertions do cost time and can thus be turned off. - \b EIGEN_NO_DEBUG - disables %Eigen's assertions if defined. Not defined by default, unless the - \c NDEBUG macro is defined (this is a standard C++ macro which disables all asserts). - - \b EIGEN_NO_STATIC_ASSERT - if defined, compile-time static assertions are replaced by runtime assertions; + \c NDEBUG macro is defined (this is a standard C++ macro which disables all asserts). + - \b EIGEN_NO_STATIC_ASSERT - if defined, compile-time static assertions are replaced by runtime assertions; this saves compilation time. Not defined by default. - \b eigen_assert - macro with one argument that is used inside %Eigen for assertions. By default, it is basically defined to be \c assert, which aborts the program if the assertion is violated. Redefine this @@ -90,7 +87,7 @@ run time. However, these assertions do cost time and can thus be turned off. Let us emphasize that \c EIGEN_MAX_*_ALIGN_BYTES define only a desirable upper bound. In practice data is aligned to largest power-of-two common divisor of \c EIGEN_MAX_STATIC_ALIGN_BYTES and the size of the data, such that memory is not wasted. - \b \c EIGEN_DONT_PARALLELIZE - if defined, this disables multi-threading. This is only relevant if you enabled OpenMP. See \ref TopicMultiThreading for details. - - \b \c EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless + - \b \c EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN. - \b \c EIGEN_UNALIGNED_VECTORIZE - disables/enables vectorization with unaligned stores. Default is 1 (enabled). If set to 0 (disabled), then expression for which the destination cannot be aligned are not vectorized (e.g., unaligned