diff --git a/Eigen/src/Core/arch/AVX512/GemmKernel.h b/Eigen/src/Core/arch/AVX512/GemmKernel.h index e06b83c91c4134ff8de104b8b2bbe285ab526b9f..483e14937e87d9eb700b35856963f487b8fd7e21 100644 --- a/Eigen/src/Core/arch/AVX512/GemmKernel.h +++ b/Eigen/src/Core/arch/AVX512/GemmKernel.h @@ -35,6 +35,8 @@ namespace Eigen { namespace internal { +#if EIGEN_USE_AVX512_GEMM_KERNELS + template class gemm_class { using vec = typename packet_traits::type; @@ -947,7 +949,6 @@ EIGEN_DONT_INLINE void gemm_kern_avx512(Index m, Index n, Index k, Scalar *alpha } // Template specializations of GEBP kernels with nr = 8. -#if EIGEN_USE_AVX512_GEMM_KERNELS template class gebp_traits : public gebp_traits { diff --git a/Eigen/src/Core/arch/AVX512/TrsmKernel.h b/Eigen/src/Core/arch/AVX512/TrsmKernel.h index c763b5fe3244d7e8b74556ba4558aa98cbd33173..cb72152472fbd3c1dfcb2a75aba8673b6a121e98 100644 --- a/Eigen/src/Core/arch/AVX512/TrsmKernel.h +++ b/Eigen/src/Core/arch/AVX512/TrsmKernel.h @@ -44,6 +44,8 @@ namespace Eigen { namespace internal { +#if (EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0) + #define EIGEN_AVX_MAX_NUM_ACC (int64_t(24)) #define EIGEN_AVX_MAX_NUM_ROW (int64_t(8)) // Denoted L in code. #define EIGEN_AVX_MAX_K_UNROL (int64_t(4)) @@ -58,7 +60,6 @@ typedef Packet4d vecHalfDouble; // Note: this depends on macros and typedefs above. #include "TrsmUnrolls.inc" -#if (EIGEN_USE_AVX512_TRSM_KERNELS) && (EIGEN_COMP_CLANG != 0) /** * For smaller problem sizes, and certain compilers, using the optimized kernels trsmKernelL/R directly * is faster than the packed versions in TriangularSolverMatrix.h. diff --git a/Eigen/src/Core/products/TriangularSolverMatrix.h b/Eigen/src/Core/products/TriangularSolverMatrix.h index 8244758bd30962f00de7c73179451645d9bf5270..c09fec4f68144d00b0179abd5c411150b07589ba 100644 --- a/Eigen/src/Core/products/TriangularSolverMatrix.h +++ b/Eigen/src/Core/products/TriangularSolverMatrix.h @@ -141,7 +141,8 @@ EIGEN_DONT_INLINE void triangular_solve_matrix::value || std::is_same::value))) { // Very rough cutoffs to determine when to call trsm w/o packing @@ -209,7 +210,7 @@ EIGEN_DONT_INLINE void triangular_solve_matrix::value || std::is_same::value))) { i = IsLower ? k2 + k1 : k2 - k1 - actualPanelWidth; @@ -273,7 +274,8 @@ EIGEN_DONT_INLINE void triangular_solve_matrix& blocking) { Index rows = otherSize; -#if defined(EIGEN_VECTORIZE_AVX512) && EIGEN_USE_AVX512_TRSM_R_KERNELS && EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS +#if defined(EIGEN_VECTORIZE_AVX512) && defined(EIGEN_USE_AVX512_TRSM_R_KERNELS) && EIGEN_USE_AVX512_TRSM_R_KERNELS && \ + EIGEN_ENABLE_AVX512_NOCOPY_TRSM_R_CUTOFFS EIGEN_IF_CONSTEXPR( (OtherInnerStride == 1 && (std::is_same::value || std::is_same::value))) { // TODO: Investigate better heuristics for cutoffs. diff --git a/ci/build.linux.gitlab-ci.yml b/ci/build.linux.gitlab-ci.yml index 9e6303488024e2150d2bd0162fd9102c6edb7d87..04573f2ccf318f2cec730aaef2bd41e5e35e2ace 100644 --- a/ci/build.linux.gitlab-ci.yml +++ b/ci/build.linux.gitlab-ci.yml @@ -97,6 +97,18 @@ build:linux:cross:x86-64:clang-12:avx512dq: variables: EIGEN_CI_ADDITIONAL_ARGS: "-DEIGEN_TEST_AVX512DQ=on" +# Generic vector extension backend. +build:linux:cross:x86-64:clang-19:generic:avx512dq: + image: ubuntu:24.04 + extends: build:linux:cross:x86-64:clang-12:default + variables: + EIGEN_CI_INSTALL: clang-19 + EIGEN_CI_C_COMPILER: clang-19 + EIGEN_CI_CXX_COMPILER: clang++-19 + EIGEN_CI_CROSS_INSTALL: g++-14-x86-64-linux-gnu clang-19 + EIGEN_CI_ADDITIONAL_ARGS: > + -DEIGEN_TEST_CUSTOM_CXX_FLAGS=-mfma;-mavx512dq;-DEIGEN_VECTORIZE_GENERIC=1 + build:linux:docs: extends: .build:linux:cross variables: diff --git a/ci/test.linux.gitlab-ci.yml b/ci/test.linux.gitlab-ci.yml index 17c7ee0e21832357b942c728f3a31e67f554e94e..6712337cc287d46e9a662700f82b279355d30d01 100644 --- a/ci/test.linux.gitlab-ci.yml +++ b/ci/test.linux.gitlab-ci.yml @@ -197,6 +197,29 @@ test:linux:x86-64:clang-12:avx512dq:unsupported: variables: EIGEN_CI_CTEST_LABEL: Unsupported +# Generic vector extension backend. +.test:linux:x86-64:clang-19:generic:avx512dq: + image: ubuntu:24.04 + extends: .test:linux:x86-64 + needs: [ build:linux:cross:x86-64:clang-19:generic:avx512dq ] + variables: + EIGEN_CI_INSTALL: clang-19 + tags: + - eigen-runner + - linux + - x86-64 + - avx512 + +test:linux:x86-64:clang-19:generic:avx512dq:official: + extends: .test:linux:x86-64:clang-19:generic:avx512dq + variables: + EIGEN_CI_CTEST_LABEL: Official + +test:linux:x86-64:clang-19:generic:avx512dq:unsupported: + extends: .test:linux:x86-64:clang-19:generic:avx512dq + variables: + EIGEN_CI_CTEST_LABEL: Unsupported + ##### CUDA ##################################################################### .test:linux:cuda: extends: .test:linux