From 7ba0dc0d731b2867192c90e27b857ba983c725ca Mon Sep 17 00:00:00 2001 From: guoqiangqi <425418567@qq.com> Date: Tue, 26 Jan 2021 15:42:45 +0800 Subject: [PATCH 1/6] Supplement related apis for int32/float32 packed types in SVE module. --- Eigen/src/Core/arch/SVE/PacketMath.h | 49 ++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h index 4877b6d80..5e20631aa 100644 --- a/Eigen/src/Core/arch/SVE/PacketMath.h +++ b/Eigen/src/Core/arch/SVE/PacketMath.h @@ -108,6 +108,12 @@ EIGEN_STRONG_INLINE PacketXi psub(const PacketXi& a, const PacketXi& b return svsub_s32_z(svptrue_b32(), a, b); } +template <> +EIGEN_STRONG_INLINE PacketXi pabsdiff(const PacketXi& a, const PacketXi& b) +{ + return svabd_s32_z(svptrue_b32(), a, b); +} + template <> EIGEN_STRONG_INLINE PacketXi pnegate(const PacketXi& a) { @@ -286,6 +292,13 @@ EIGEN_STRONG_INLINE numext::int32_t pfirst(const PacketXi& a) return svlasta_s32(svpfalse_b(), a); } +template <> +EIGEN_STRONG_INLINE PacketXi pselect(const PacketXi& mask, const PacketXi& a, const PacketXi& b) +{ + return svbsl(mask, a, b); +} + + template <> EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a) { @@ -424,6 +437,12 @@ struct unpacket_traits { }; }; +template <> +EIGEN_STRONG_INLINE void prefetch(const float* addr) +{ + svprfw(svptrue_b32(), addr, SV_PLDL1KEEP); +} + template <> EIGEN_STRONG_INLINE PacketXf pset1(const float& from) { @@ -456,6 +475,18 @@ EIGEN_STRONG_INLINE PacketXf psub(const PacketXf& a, const PacketXf& b return svsub_f32_z(svptrue_b32(), a, b); } +template <> +EIGEN_STRONG_INLINE PacketXf paddsub(const PacketXf& a, const PacketXf& b) +{ + return avadd_f32_x(svpture_b32(), a, svneg_f32_m(b, svdupq_n_b32(1,0,1,0), b)); +} + +template <> +EIGEN_STRONG_INLINE PacketXf pabsdiff(const PacketXf& a, const PacketXf& b) +{ + return svabd_f32_z(svptrue_b32(), a, b); +} + template <> EIGEN_STRONG_INLINE PacketXf pnegate(const PacketXf& a) { @@ -480,6 +511,12 @@ EIGEN_STRONG_INLINE PacketXf pdiv(const PacketXf& a, const PacketXf& b return svdiv_f32_z(svptrue_b32(), a, b); } +template <> +EIGEN_STRONG_INLIN PacketXf psqrt(const PacketXf& a) +{ + return svsqrt_f32_z(svptrue_b32(), a); +} + template <> EIGEN_STRONG_INLINE PacketXf pmadd(const PacketXf& a, const PacketXf& b, const PacketXf& c) { @@ -652,6 +689,12 @@ EIGEN_STRONG_INLINE float pfirst(const PacketXf& a) return svlasta_f32(svpfalse_b(), a); } +template <> +EIGEN_STRONG_INLINE PacketXf pselect(const PacketXf& mask, const PacketXf& a, const PacketXf& b) +{ + return svreinterpret_f32(svbsl(svreinterpret_u32(mask), svreinterpret_u32(a), svreinterpret_u32(b))); +} + template <> EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a) { @@ -727,6 +770,12 @@ EIGEN_STRONG_INLINE float predux_max(const PacketXf& a) return svmaxv_f32(svptrue_b32(), a); } +template <> +EIGEN_STRONG_INLINE bool predux_any(const PacketXf& a) +{ + svptest_any(svptrue_b32(), svcmpne_n_f32(svpture_b32(), a, 0.0f)); +} + template EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& kernel) { -- GitLab From 6bd36328417cb70cab4f855e4be6bab899bf003a Mon Sep 17 00:00:00 2001 From: guoqiangqi <425418567@qq.com> Date: Tue, 26 Jan 2021 17:28:24 +0800 Subject: [PATCH 2/6] Guard pselect impl with __ARM_FEATURE_SVE2 since svbsl was only supported by sve2 --- Eigen/src/Core/arch/SVE/PacketMath.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h index 5e20631aa..546f5af3e 100644 --- a/Eigen/src/Core/arch/SVE/PacketMath.h +++ b/Eigen/src/Core/arch/SVE/PacketMath.h @@ -292,12 +292,13 @@ EIGEN_STRONG_INLINE numext::int32_t pfirst(const PacketXi& a) return svlasta_s32(svpfalse_b(), a); } +#if __ARM_FEATURE_SVE2 template <> EIGEN_STRONG_INLINE PacketXi pselect(const PacketXi& mask, const PacketXi& a, const PacketXi& b) { return svbsl(mask, a, b); } - +#endif template <> EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a) @@ -478,11 +479,11 @@ EIGEN_STRONG_INLINE PacketXf psub(const PacketXf& a, const PacketXf& b template <> EIGEN_STRONG_INLINE PacketXf paddsub(const PacketXf& a, const PacketXf& b) { - return avadd_f32_x(svpture_b32(), a, svneg_f32_m(b, svdupq_n_b32(1,0,1,0), b)); + return svadd_f32_x(svptrue_b32(), a, svneg_f32_m(b, svdupq_n_b32(1,0,1,0), b)); } template <> -EIGEN_STRONG_INLINE PacketXf pabsdiff(const PacketXf& a, const PacketXf& b) +EIGEN_STRONG_INLINE PacketXf pabsdiff(const PacketXf& a, const PacketXf& b) { return svabd_f32_z(svptrue_b32(), a, b); } @@ -512,7 +513,7 @@ EIGEN_STRONG_INLINE PacketXf pdiv(const PacketXf& a, const PacketXf& b } template <> -EIGEN_STRONG_INLIN PacketXf psqrt(const PacketXf& a) +EIGEN_STRONG_INLINE PacketXf psqrt(const PacketXf& a) { return svsqrt_f32_z(svptrue_b32(), a); } @@ -689,11 +690,13 @@ EIGEN_STRONG_INLINE float pfirst(const PacketXf& a) return svlasta_f32(svpfalse_b(), a); } +#if __ARM_FEATURE_SVE2 template <> EIGEN_STRONG_INLINE PacketXf pselect(const PacketXf& mask, const PacketXf& a, const PacketXf& b) { return svreinterpret_f32(svbsl(svreinterpret_u32(mask), svreinterpret_u32(a), svreinterpret_u32(b))); } +#endif template <> EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a) @@ -773,7 +776,7 @@ EIGEN_STRONG_INLINE float predux_max(const PacketXf& a) template <> EIGEN_STRONG_INLINE bool predux_any(const PacketXf& a) { - svptest_any(svptrue_b32(), svcmpne_n_f32(svpture_b32(), a, 0.0f)); + return svptest_any(svptrue_b32(), svcmpne_n_f32(svptrue_b32(), a, 0.0f)); } template -- GitLab From 8753fe5ccfbac682f766fd804a9b470530d0fe38 Mon Sep 17 00:00:00 2001 From: guoqiangqi <425418567@qq.com> Date: Tue, 26 Jan 2021 20:27:14 +0800 Subject: [PATCH 3/6] Add pselect sve impl --- Eigen/src/Core/arch/SVE/PacketMath.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h index 546f5af3e..61a589203 100644 --- a/Eigen/src/Core/arch/SVE/PacketMath.h +++ b/Eigen/src/Core/arch/SVE/PacketMath.h @@ -292,13 +292,16 @@ EIGEN_STRONG_INLINE numext::int32_t pfirst(const PacketXi& a) return svlasta_s32(svpfalse_b(), a); } -#if __ARM_FEATURE_SVE2 template <> EIGEN_STRONG_INLINE PacketXi pselect(const PacketXi& mask, const PacketXi& a, const PacketXi& b) { - return svbsl(mask, a, b); -} +#if __ARM_FEATURE_SVE2 + return svbsl(a, b, mask); +#else + PacketXi mask_inv = svnot_s32_z(svptrue_b32(), mask); + return svorr_s32_z(svptrue_b32(), svand_s32_z(svptrue_b32(), a, mask), svand_s32_z(svptrue_b32(), b, mask_inv)); #endif +} template <> EIGEN_STRONG_INLINE PacketXi preverse(const PacketXi& a) @@ -690,13 +693,21 @@ EIGEN_STRONG_INLINE float pfirst(const PacketXf& a) return svlasta_f32(svpfalse_b(), a); } -#if __ARM_FEATURE_SVE2 + template <> EIGEN_STRONG_INLINE PacketXf pselect(const PacketXf& mask, const PacketXf& a, const PacketXf& b) { - return svreinterpret_f32(svbsl(svreinterpret_u32(mask), svreinterpret_u32(a), svreinterpret_u32(b))); -} +#if __ARM_FEATURE_SVE2 + return svreinterpret_f32(svbsl(svreinterpret_u32_f32(a), svreinterpret_u32_f32(b), svreinterpret_u32_f32(mask))); +#else + svuint32_t mask_ = svreinterpret_u32_f32(mask); + svuint32_t mask_inv_ = svnot_u32_z(svptrue_b32(), mask_); + svuint32_t a_ = svand_u32_z(svptrue_b32(), svreinterpret_u32_f32(a), mask_); + svuint32_t b_ = svand_u32_z(svptrue_b32(), svreinterpret_u32_f32(b), mask_inv_); + return svreinterpret_f32_u32(svorr_u32_z(svptrue_b32(), a_, b_)); #endif +} + template <> EIGEN_STRONG_INLINE PacketXf preverse(const PacketXf& a) -- GitLab From 1d86bf0d9011d938e97db63ae1bc059101831cb2 Mon Sep 17 00:00:00 2001 From: guoqiangqi <425418567@qq.com> Date: Wed, 27 Jan 2021 14:30:20 +0800 Subject: [PATCH 4/6] Using svls*_n instead of svls* . --- Eigen/src/Core/arch/SVE/PacketMath.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h index 61a589203..5a0c847f0 100644 --- a/Eigen/src/Core/arch/SVE/PacketMath.h +++ b/Eigen/src/Core/arch/SVE/PacketMath.h @@ -219,13 +219,13 @@ EIGEN_STRONG_INLINE PacketXi parithmetic_shift_right(PacketXi a) template EIGEN_STRONG_INLINE PacketXi plogical_shift_right(PacketXi a) { - return svreinterpret_s32_u32(svlsr_u32_z(svptrue_b32(), svreinterpret_u32_s32(a), svdup_n_u32_z(svptrue_b32(), N))); + return svreinterpret_s32_u32(svlsr_n_u32_z(svptrue_b32(), svreinterpret_u32_s32(a), N)); } template EIGEN_STRONG_INLINE PacketXi plogical_shift_left(PacketXi a) { - return svlsl_s32_z(svptrue_b32(), a, svdup_n_u32_z(svptrue_b32(), N)); + return svlsl_n_s32_z(svptrue_b32(), a, N); } template <> -- GitLab From e24b9cd20ac8ed41998b5441e28955fcb30e6e99 Mon Sep 17 00:00:00 2001 From: guoqiangqi <425418567@qq.com> Date: Sat, 30 Jan 2021 10:58:15 +0800 Subject: [PATCH 5/6] fix typo in initial code --- Eigen/src/Core/arch/SVE/PacketMath.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h index 5a0c847f0..f250c96e4 100644 --- a/Eigen/src/Core/arch/SVE/PacketMath.h +++ b/Eigen/src/Core/arch/SVE/PacketMath.h @@ -159,7 +159,7 @@ EIGEN_STRONG_INLINE PacketXi pmax(const PacketXi& a, const PacketXi& b template <> EIGEN_STRONG_INLINE PacketXi pcmp_le(const PacketXi& a, const PacketXi& b) { - return svdup_n_s32_z(svcmplt_s32(svptrue_b32(), a, b), 0xffffffffu); + return svdup_n_s32_z(svcmple_s32(svptrue_b32(), a, b), 0xffffffffu); } template <> @@ -568,7 +568,7 @@ EIGEN_STRONG_INLINE PacketXf pmax(const PacketXf& a, template <> EIGEN_STRONG_INLINE PacketXf pcmp_le(const PacketXf& a, const PacketXf& b) { - return svreinterpret_f32_u32(svdup_n_u32_z(svcmplt_f32(svptrue_b32(), a, b), 0xffffffffu)); + return svreinterpret_f32_u32(svdup_n_u32_z(svcmple_f32(svptrue_b32(), a, b), 0xffffffffu)); } template <> -- GitLab From 5c3904b9f633db4e3f31c18c2d97b68fdc822672 Mon Sep 17 00:00:00 2001 From: guoqiangqi <425418567@qq.com> Date: Wed, 31 Mar 2021 09:52:12 +0800 Subject: [PATCH 6/6] Replace '#if (...)' with '#if defined(...)' to keep it from the warning on using an undefined macro. --- Eigen/src/Core/arch/SVE/PacketMath.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/arch/SVE/PacketMath.h b/Eigen/src/Core/arch/SVE/PacketMath.h index f250c96e4..5b7bb262e 100644 --- a/Eigen/src/Core/arch/SVE/PacketMath.h +++ b/Eigen/src/Core/arch/SVE/PacketMath.h @@ -295,7 +295,7 @@ EIGEN_STRONG_INLINE numext::int32_t pfirst(const PacketXi& a) template <> EIGEN_STRONG_INLINE PacketXi pselect(const PacketXi& mask, const PacketXi& a, const PacketXi& b) { -#if __ARM_FEATURE_SVE2 +#if defined(__ARM_FEATURE_SVE2) return svbsl(a, b, mask); #else PacketXi mask_inv = svnot_s32_z(svptrue_b32(), mask); @@ -697,7 +697,7 @@ EIGEN_STRONG_INLINE float pfirst(const PacketXf& a) template <> EIGEN_STRONG_INLINE PacketXf pselect(const PacketXf& mask, const PacketXf& a, const PacketXf& b) { -#if __ARM_FEATURE_SVE2 +#if defined(__ARM_FEATURE_SVE2) return svreinterpret_f32(svbsl(svreinterpret_u32_f32(a), svreinterpret_u32_f32(b), svreinterpret_u32_f32(mask))); #else svuint32_t mask_ = svreinterpret_u32_f32(mask); -- GitLab