From 2e1cc4aa018d1a4030cd99579508278b05bde406 Mon Sep 17 00:00:00 2001 From: guoqiangqi <425418567@qq.com> Date: Thu, 25 Feb 2021 14:38:00 +0800 Subject: [PATCH 1/3] Using Eigen's vectorized sin and cos to speed up the calculation of twiddles in ei_kissfft_impl.h --- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 74 +++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 430953aee..7460fbf47 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -29,6 +29,12 @@ struct kiss_cpx_fft { using numext::sin; using numext::cos; + typedef typename packet_traits::type PacketType; + int psize = unpacket_traits::size; + Scalar* arr = new Scalar[psize]; + Scalar* arr_c = new Scalar[psize]; + Scalar* arr_s = new Scalar[psize]; + m_inverse = inverse; m_twiddles.resize(nfft); double phinc = 0.25 * double(EIGEN_PI) / nfft; @@ -37,6 +43,22 @@ struct kiss_cpx_fft if ((nfft&1)==0) m_twiddles[nfft/2] = Complex(Scalar(-1), Scalar(0)); int i=1; + + for (;(i+psize-1)*8(arr); + PacketType c = pcos(data); + PacketType s = psin(data); + pstore(arr_s, s); + pstore(arr_c, c); + for(int j=0;j(arr); + PacketType c = pcos(data); + PacketType s = psin(data); + pstore(arr_s, s); + pstore(arr_c, c); + for(int j=0;j(arr); + PacketType c = pcos(data); + PacketType s = psin(data); + pstore(arr_s, s); + pstore(arr_c, c); + for(int j=0;j(arr); + PacketType c = pcos(data); + PacketType s = psin(data); + pstore(arr_s, s); + pstore(arr_c, c); + for(int j=0;j Date: Thu, 25 Feb 2021 15:05:56 +0800 Subject: [PATCH 2/3] Fix segmentation fault on x64 --- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 24 ++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 7460fbf47..d0e8fe003 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -49,11 +49,11 @@ struct kiss_cpx_fft for(int j=0;j(arr); + PacketType data = ploadu(arr); PacketType c = pcos(data); PacketType s = psin(data); - pstore(arr_s, s); - pstore(arr_c, c); + pstoreu(arr_s, s); + pstoreu(arr_c, c); for(int j=0;j(arr); + PacketType data = ploadu(arr); PacketType c = pcos(data); PacketType s = psin(data); - pstore(arr_s, s); - pstore(arr_c, c); + pstoreu(arr_s, s); + pstoreu(arr_c, c); for(int j=0;j(arr); + PacketType data = ploadu(arr); PacketType c = pcos(data); PacketType s = psin(data); - pstore(arr_s, s); - pstore(arr_c, c); + pstoreu(arr_s, s); + pstoreu(arr_c, c); for(int j=0;j(arr); + PacketType data = ploadu(arr); PacketType c = pcos(data); PacketType s = psin(data); - pstore(arr_s, s); - pstore(arr_c, c); + pstoreu(arr_s, s); + pstoreu(arr_c, c); for(int j=0;j Date: Fri, 26 Feb 2021 09:27:07 +0800 Subject: [PATCH 3/3] Allocate the arrays on stack instead of heap --- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index d0e8fe003..e2df9641c 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -31,9 +31,9 @@ struct kiss_cpx_fft using numext::cos; typedef typename packet_traits::type PacketType; int psize = unpacket_traits::size; - Scalar* arr = new Scalar[psize]; - Scalar* arr_c = new Scalar[psize]; - Scalar* arr_s = new Scalar[psize]; + Scalar arr[unpacket_traits::size]; + Scalar arr_s[unpacket_traits::size]; + Scalar arr_c[unpacket_traits::size]; m_inverse = inverse; m_twiddles.resize(nfft); @@ -136,9 +136,6 @@ struct kiss_cpx_fft m_twiddles[nfft-i] = Complex(-c, -s*flip); } - delete[] arr; - delete[] arr_s; - delete[] arr_c; } void factorize(int nfft) -- GitLab