From 1c88c2e0e5a77e220add168ef84dd7bcca2521f1 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Fri, 9 Sep 2022 23:09:24 -0700 Subject: [PATCH] Add DUCC FFT support. DUCC is the successor to PocketFFT (same author), and is used by scipy and Google JAX. It is now the recommended library according to the PocketFFT author. We're looking into using DUCC for TensorFlow and XLA as well. --- unsupported/Eigen/FFT | 20 +++-- unsupported/Eigen/src/FFT/ei_duccfft_impl.h | 73 +++++++++++++++++++ unsupported/Eigen/src/FFT/ei_pocketfft_impl.h | 21 +++--- unsupported/test/CMakeLists.txt | 18 +++++ unsupported/test/duccfft.cpp | 3 + unsupported/test/fft_test_shared.h | 6 +- 6 files changed, 121 insertions(+), 20 deletions(-) create mode 100644 unsupported/Eigen/src/FFT/ei_duccfft_impl.h create mode 100644 unsupported/test/duccfft.cpp diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index b929e842f..b339fd289 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -29,19 +29,20 @@ * The default implementation is based on kissfft. It is a small, free, and * reasonably efficient default. * - * There are currently four implementation backend: + * There are currently four implementation backends: * * - kissfft(https://github.com/mborgerding/kissfft) : Simple and not so fast, BSD-3-Clause. * It is a mixed-radix Fast Fourier Transform based up on the principle, "Keep It Simple, Stupid." * Notice that:kissfft fails to handle "atypically-sized" inputs(i.e., sizes with large factors),a workaround is using fftw or pocketfft. * - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size. * - MKL (https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html) : fastest, free -- may be incompatible with Eigen in GPL form. - * - pocketfft (https://gitlab.mpcdf.mpg.de/mtr/pocketfft) : faster than kissfft, BSD 3-clause. + * - PocketFFT/DUCC (https://gitlab.mpcdf.mpg.de/mtr/pocketfft, https://gitlab.mpcdf.mpg.de/mtr/ducc) : faster than kissfft, BSD 3-clause. * It is a heavily modified implementation of FFTPack, with the following advantages: * 1.strictly C++11 compliant * 2.more accurate twiddle factor computation * 3.very fast plan generation * 4.worst case complexity for transform sizes with large prime factors is N*log(N), because Bluestein's algorithm is used for these cases + * According to the author, DUCC contains the "evolution" of pocketfft, though the interface is very similar. * * \section FFTDesign Design * @@ -95,12 +96,19 @@ } #elif defined EIGEN_POCKETFFT_DEFAULT // internal::pocketfft_impl: a heavily modified implementation of FFTPack, with many advantages. -# include -# include"src/FFT/ei_pocketfft_impl.h" +# include +# include "src/FFT/ei_pocketfft_impl.h" namespace Eigen { template struct default_fft_impl : public internal::pocketfft_impl {}; } +#elif defined EIGEN_DUCCFFT_DEFAULT +# include +# include "src/FFT/ei_duccfft_impl.h" + namespace Eigen { + template + struct default_fft_impl : public internal::duccfft_impl {}; + } #else // internal::kissfft_impl: small, free, reasonably efficient default, derived from kissfft # include "src/FFT/ei_kissfft_impl.h" @@ -210,7 +218,7 @@ class FFT m_impl.fwd(dst,src,static_cast(nfft)); } -#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT +#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_DUCCFFT_DEFAULT || defined EIGEN_MKL_DEFAULT inline void fwd2(Complex * dst, const Complex * src, int n0,int n1) { @@ -369,7 +377,7 @@ class FFT } -#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT +#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_DUCCFFT_DEFAULT || defined EIGEN_MKL_DEFAULT inline void inv2(Complex * dst, const Complex * src, int n0,int n1) { diff --git a/unsupported/Eigen/src/FFT/ei_duccfft_impl.h b/unsupported/Eigen/src/FFT/ei_duccfft_impl.h new file mode 100644 index 000000000..35afdcbf8 --- /dev/null +++ b/unsupported/Eigen/src/FFT/ei_duccfft_impl.h @@ -0,0 +1,73 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +namespace Eigen { + +namespace internal { + +template +struct duccfft_impl +{ + using Scalar = _Scalar; + using Complex = std::complex; + using shape_t = ducc0::fmav_info::shape_t; + using stride_t = ducc0::fmav_info::stride_t; + + inline void clear() {} + + inline void fwd(Complex* dst, const Scalar* src, int nfft){ + const shape_t axes { 0 }; + ducc0::cfmav m_in(src, shape_t{static_cast(nfft)}); + ducc0::vfmav m_out(dst, shape_t{static_cast(nfft) / 2 + 1}); + ducc0::r2c(m_in, m_out, axes, /*forward=*/true, /*scale=*/static_cast(1)); + } + + inline void fwd(Complex* dst, const Complex* src, int nfft){ + const shape_t axes { 0 }; + ducc0::cfmav m_in(src, shape_t{static_cast(nfft)}); + ducc0::vfmav m_out(dst, shape_t{static_cast(nfft)}); + ducc0::c2c(m_in, m_out, axes, /*forward=*/true, /*scale=*/static_cast(1)); + } + + inline void inv(Scalar* dst, const Complex* src, int nfft){ + const shape_t axes { 0 }; + ducc0::cfmav m_in(src, shape_t{static_cast(nfft) / 2 + 1}); + ducc0::vfmav m_out(dst, shape_t{static_cast(nfft)}); + ducc0::c2r(m_in, m_out, axes, /*forward=*/false, /*scale=*/static_cast(1)); + } + + inline void inv(Complex* dst, const Complex* src, int nfft){ + const shape_t axes { 0 }; + ducc0::cfmav m_in(src, shape_t{static_cast(nfft)}); + ducc0::vfmav m_out(dst, shape_t{static_cast(nfft)}); + ducc0::c2c(m_in, m_out, axes, /*forward=*/false, /*scale=*/static_cast(1)); + } + + inline void fwd2(Complex* dst, const Complex* src, int nfft0, int nfft1){ + const shape_t axes { 0, 1 }; + const shape_t in_shape { static_cast(nfft0), static_cast(nfft1) }; + const shape_t out_shape { static_cast(nfft0), static_cast(nfft1) }; + const stride_t stride { static_cast(nfft1), static_cast(1) }; + ducc0::cfmav m_in(src, in_shape, stride); + ducc0::vfmav m_out(dst, out_shape, stride); + ducc0::c2c(m_in, m_out, axes, /*forward=*/true, /*scale=*/static_cast(1)); + } + + inline void inv2(Complex* dst, const Complex* src, int nfft0, int nfft1){ + const shape_t axes { 0, 1 }; + const shape_t in_shape { static_cast(nfft0), static_cast(nfft1) }; + const shape_t out_shape { static_cast(nfft0), static_cast(nfft1) }; + const stride_t stride { static_cast(nfft1), static_cast(1) }; + ducc0::cfmav m_in(src, in_shape, stride); + ducc0::vfmav m_out(dst, out_shape, stride); + ducc0::c2c(m_in, m_out, axes, /*forward=*/false, /*scale=*/static_cast(1)); + } +}; + +} // namespace internal +} // namespace Eigen diff --git a/unsupported/Eigen/src/FFT/ei_pocketfft_impl.h b/unsupported/Eigen/src/FFT/ei_pocketfft_impl.h index f2da890c7..9f05502f5 100644 --- a/unsupported/Eigen/src/FFT/ei_pocketfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_pocketfft_impl.h @@ -5,9 +5,6 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -using namespace pocketfft; -using namespace pocketfft::detail; - namespace Eigen { namespace internal { @@ -15,8 +12,10 @@ namespace internal { template struct pocketfft_impl { - typedef _Scalar Scalar; - typedef std::complex Complex; + using Scalar = _Scalar; + using Complex = std::complex; + using shape_t = pocketfft::shape_t; + using stride_t = pocketfft::stride_t; inline void clear() {} @@ -25,14 +24,14 @@ struct pocketfft_impl const shape_t axes_{ 0 }; const stride_t stride_in{ sizeof(Scalar) }; const stride_t stride_out{ sizeof(Complex) }; - r2c(shape_, stride_in, stride_out, axes_, FORWARD, src, dst, static_cast(1)); + pocketfft::r2c(shape_, stride_in, stride_out, axes_, pocketfft::FORWARD, src, dst, static_cast(1)); } inline void fwd(Complex* dst, const Complex* src, int nfft){ const shape_t shape_{ static_cast(nfft) }; const shape_t axes_{ 0 }; const stride_t stride_{ sizeof(Complex) }; - c2c(shape_, stride_, stride_, axes_, FORWARD, src, dst, static_cast(1)); + pocketfft::c2c(shape_, stride_, stride_, axes_, pocketfft::FORWARD, src, dst, static_cast(1)); } inline void inv(Scalar* dst, const Complex* src, int nfft){ @@ -40,28 +39,28 @@ struct pocketfft_impl const shape_t axes_{ 0 }; const stride_t stride_in{ sizeof(Complex) }; const stride_t stride_out{ sizeof(Scalar) }; - c2r(shape_, stride_in, stride_out, axes_, BACKWARD, src, dst, static_cast(1)); + pocketfft::c2r(shape_, stride_in, stride_out, axes_, pocketfft::BACKWARD, src, dst, static_cast(1)); } inline void inv(Complex* dst, const Complex* src, int nfft){ const shape_t shape_{ static_cast(nfft) }; const shape_t axes_{ 0 }; const stride_t stride_{ sizeof(Complex) }; - c2c(shape_, stride_, stride_, axes_, BACKWARD, src, dst, static_cast(1)); + pocketfft::c2c(shape_, stride_, stride_, axes_, pocketfft::BACKWARD, src, dst, static_cast(1)); } inline void fwd2(Complex* dst, const Complex* src, int nfft0, int nfft1){ const shape_t shape_{ static_cast(nfft0), static_cast(nfft1) }; const shape_t axes_{ 0, 1 }; const stride_t stride_{ static_cast(sizeof(Complex)*nfft1), static_cast(sizeof(Complex)) }; - c2c(shape_, stride_, stride_, axes_, FORWARD, src, dst, static_cast(1)); + pocketfft::c2c(shape_, stride_, stride_, axes_, pocketfft::FORWARD, src, dst, static_cast(1)); } inline void inv2(Complex* dst, const Complex* src, int nfft0, int nfft1){ const shape_t shape_{ static_cast(nfft0), static_cast(nfft1) }; const shape_t axes_{ 0, 1 }; const stride_t stride_{ static_cast(sizeof(Complex)*nfft1), static_cast(sizeof(Complex)) }; - c2c(shape_, stride_, stride_, axes_, BACKWARD, src, dst, static_cast(1)); + pocketfft::c2c(shape_, stride_, stride_, axes_, pocketfft::BACKWARD, src, dst, static_cast(1)); } }; diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 21d8c5eb5..4f555a6b1 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -88,6 +88,24 @@ else() ei_add_property(EIGEN_MISSING_BACKENDS "pocketfft, ") endif() +if( NOT DUCC_ROOT AND ENV{DUCC_ROOT} ) + set( DUCC_ROOT $ENV{DUCC_ROOT} ) +endif() +find_path(DUCCFFT + NAMES "src/ducc0/fft/fft.h" + PATHS ${DUCC_ROOT}) +message(INFO " ${DUCC_ROOT} ${DUCCFFT}") +if(DUCCFFT) + ei_add_property(EIGEN_TESTED_BACKENDS "duccfft, ") + include_directories( "${DUCCFFT}/src" ) + add_library(ducc_lib "${DUCCFFT}/src/ducc0/infra/threading.cc" ) + target_compile_definitions(ducc_lib PUBLIC "DUCC0_NO_THREADING=1") + ei_add_test(duccfft "-DEIGEN_DUCCFFT_DEFAULT -DDUCC0_NO_THREADING=1" "ducc_lib" ) + set_target_properties(ducc_lib duccfft PROPERTIES CXX_STANDARD 17) +else() + ei_add_property(EIGEN_MISSING_BACKENDS "duccfft, ") +endif() + option(EIGEN_TEST_OPENGL "Enable OpenGL support in unit tests" OFF) if(EIGEN_TEST_OPENGL) find_package(OpenGL) diff --git a/unsupported/test/duccfft.cpp b/unsupported/test/duccfft.cpp new file mode 100644 index 000000000..6f1e9d97f --- /dev/null +++ b/unsupported/test/duccfft.cpp @@ -0,0 +1,3 @@ +#define EIGEN_DUCCFFT_DEFAULT 1 +#include // Needs to be included before main.h. +#include "fft_test_shared.h" diff --git a/unsupported/test/fft_test_shared.h b/unsupported/test/fft_test_shared.h index 0e040ad70..1b9f99c80 100644 --- a/unsupported/test/fft_test_shared.h +++ b/unsupported/test/fft_test_shared.h @@ -240,7 +240,7 @@ EIGEN_DECLARE_TEST(FFTW) { CALL_SUBTEST(test_scalar(2 * 3 * 4 * 5 * 7)); CALL_SUBTEST(test_scalar(2 * 3 * 4 * 5 * 7)); -#if defined EIGEN_HAS_FFTWL || defined EIGEN_POCKETFFT_DEFAULT +#if defined EIGEN_HAS_FFTWL || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_DUCCFFT_DEFAULT CALL_SUBTEST(test_complex(32)); CALL_SUBTEST(test_complex(256)); CALL_SUBTEST(test_complex(3 * 8)); @@ -262,13 +262,13 @@ EIGEN_DECLARE_TEST(FFTW) { // fail to build since Eigen limit the stack allocation size,too big here. // CALL_SUBTEST( ( test_complex2d () ) ); #endif -#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT +#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_DUCCFFT_DEFAULT || defined EIGEN_MKL_DEFAULT CALL_SUBTEST((test_complex2d())); CALL_SUBTEST((test_complex2d())); CALL_SUBTEST((test_complex2d())); CALL_SUBTEST((test_complex2d())); #endif -#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT +#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_DUCCFFT_DEFAULT || defined EIGEN_MKL_DEFAULT CALL_SUBTEST((test_complex2d())); CALL_SUBTEST((test_complex2d())); CALL_SUBTEST((test_complex2d())); -- GitLab