From b0c24d07daec1ff7f99a481663eeb6b2f0e80823 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Tue, 14 Jul 2020 11:49:15 -0700 Subject: [PATCH] Fix NEON min/max for nan values. The NEON intrinsics `vmin/vmax` always return `nan` when comparing to numbers rather than the first argument, as specified in the standard for `std::min/std::max`. To get around this, we use a combination of greater-than/less-than (which do adhere to the standard for comparing values against `nan`) and select. Tested: ``` aarch64-linux-gnu-g++ -static -I./ -Iunsupported/ -Itest/ unsupported/test/cxx11_tensor_expr.cpp -o cxx11_tensor_expr adb push cxx11_tensor_expr /data/local/tmp/ adb shell /data/local/tmp/cxx11_tensor_expr ``` Fixes #1937 --- Eigen/src/Core/arch/NEON/PacketMath.h | 42 +++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 2c4b5bfff..80ba56ac8 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -1135,8 +1135,18 @@ template<> EIGEN_STRONG_INLINE Packet2ui pabsdiff(const Packet2ui& a, template<> EIGEN_STRONG_INLINE Packet4ui pabsdiff(const Packet4ui& a, const Packet4ui& b) { return vabdq_u32(a,b); } -template<> EIGEN_STRONG_INLINE Packet2f pmin(const Packet2f& a, const Packet2f& b) { return vmin_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return vminq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet2f pmin(const Packet2f& a, const Packet2f& b) +{ + // return vmin_f32(a,b); + // Special handling of nan. + return vbsl_f32(vcgt_f32(a, b), b, a); +} +template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) +{ + // return vminq_f32(a,b); + // Special handling of nan. + return vbslq_f32(vcgtq_f32(a, b), b, a); +} template<> EIGEN_STRONG_INLINE Packet4c pmin(const Packet4c& a, const Packet4c& b) { return vget_lane_s32(vreinterpret_s32_s8(vmin_s8( @@ -1172,8 +1182,18 @@ template<> EIGEN_STRONG_INLINE Packet2ul pmin(const Packet2ul& a, con vdup_n_u64((std::min)(vgetq_lane_u64(a, 1), vgetq_lane_u64(b, 1)))); } -template<> EIGEN_STRONG_INLINE Packet2f pmax(const Packet2f& a, const Packet2f& b) { return vmax_f32(a,b); } -template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return vmaxq_f32(a,b); } +template<> EIGEN_STRONG_INLINE Packet2f pmax(const Packet2f& a, const Packet2f& b) +{ + // return vmax_f32(a,b); + // Special handling of nan. + return vbsl_f32(vclt_f32(a, b), b, a); +} +template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) +{ + // return vmaxq_f32(a,b); + // Special handling of nan. + return vbslq_f32(vcltq_f32(a, b), b, a); +} template<> EIGEN_STRONG_INLINE Packet4c pmax(const Packet4c& a, const Packet4c& b) { return vget_lane_s32(vreinterpret_s32_s8(vmax_s8( @@ -3325,9 +3345,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& { return vmlaq_f64(c,a,b); } #endif -template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { return vminq_f64(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) +{ + // return vminq_f64(a,b); + // Special handling of nan. + return vbslq_f64(vcgtq_f64(a, b), b, a); +} -template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { return vmaxq_f64(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) +{ + // return vmaxq_f64(a,b); + // Special handling of nan. + return vbslq_f64(vcltq_f64(a, b), b, a); +} template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { -- GitLab