diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 50e8b10c43f502102ec5209adc09d7cd57e2bc17..bcbc009d381a5fea54006c5b3362763223a4aa97 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,22 +1,20 @@ variables: - MYPROJECT: 'Muscat' - ENV_NAME: "MuscatEnv" - CONDA_BUILD_PACKAGE_DEPS: "cxx-compiler mkl-include cmake cython pytest pytest-cov boost-cpp kokkos=4.4 ninja mkl-devel eigen libboost-headers" - CONDA_DOC_PACKAGE_DEPS: "sphinx<6.0 breathe doxygen furo nbsphinx ipykernel ipywidgets nbconvert trame" - CONDA_RUN_PACKAGE_DEPS: "mmgsuite libscotch dill mkl zlib numpy sympy scipy meshio vtk=*=*osmesa* pyvista networkx>=3.0 eigency=3.4.0.2 cvxpy mmgsuite h5py" - SYSTEM_PACKAGE_DEPS: "libgl1-mesa-glx" + MYPROJECT: "Muscat" + ENV_NAME: "MuscatEnv" + CONDA_BUILD_PACKAGE_DEPS: "cxx-compiler mkl-include cmake cython pytest pytest-cov boost-cpp ninja mkl-devel eigen libboost-headers kokkos=4.7.01 arborx=2.0.1" + CONDA_DOC_PACKAGE_DEPS: "sphinx<6.0 breathe doxygen furo nbsphinx ipykernel ipywidgets nbconvert trame" + CONDA_RUN_PACKAGE_DEPS: "mmgsuite libscotch dill mkl zlib numpy sympy scipy meshio vtk=*=*osmesa* pyvista networkx>=3.0 eigency=3.4.0.2 cvxpy mmgsuite h5py" + SYSTEM_PACKAGE_DEPS: "libgl1-mesa-glx" stages: - - test - - deploydoc - + - test + - deploydoc .job_rules: rules: - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH - when: manual - # default linux environment parameters .linux_env: image: "condaforge/mambaforge:latest" @@ -42,7 +40,7 @@ stages: -D CMAKE_INSTALL_PREFIX=${PREFIX} -G Ninja -B ${PWD}/cmakeBuild - -S .. + -S ../superbuild/ - > cmake --build cmakeBuild$PMVERSION @@ -58,11 +56,10 @@ stages: # --parallel 16 # --target doc_html - test_and_doc_py: extends: - - ".linux_env" - - ".build_script" + - .linux_env + - .build_script - .job_rules stage: test parallel: diff --git a/CMakeLists.txt b/CMakeLists.txt index 68e26fa3a0857959fb17fc3ed262d5d86e1a2551..e65ea8fb006dfb4965bb416f1dc39027ecabbf76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,7 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) ) endif() -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -36,7 +36,6 @@ list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) # List of options option(Muscat_ENABLE_Python "Build the Python frontend" ON) -option(Muscat_ENABLE_CUDA "Build the Kokkos CUDA support" OFF) option(Muscat_ENABLE_Mumps "Add the compilation to the mumps solver interface" OFF) option(Muscat_ENABLE_Documentation "Add the target doc_html to build the documentation" ON) diff --git a/ChangeLog.txt b/ChangeLog.txt index 5787f865bf870f05cadb8301c1db0a83d99bbb5a..90885930516797f24b89834c30d7d8889727469b 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -13,20 +13,30 @@ New in version 2.5.1: ********************* General Warning: +Kindly note that the default integrator is switched to the legacy Cpp. To use of Kokkos integrator: +- import Muscat.FE.Integration as BTFEI +- BTFEI.UseKokkos = True + The new kokkos integration routine is more powerful but also more strict. -All fields in the weak formulation must have a coordinate dependency coherent with the mesh. this means: +All fields in the weak formulation must have a coordinate dependency coherent with the mesh. This means: - if the mesh is 2D (mesh.GetPointsDimensionality() -> 2) then you must have fields only dependent of x,y ("u(x,y)" for example) -- if the mesh is 3D (mesh.GetPointsDimensionality() -> 3) then you must have fields dependents of x,y,z ("u(x,y,z)" for example) +- if the mesh is 3D (mesh.GetPointsDimensionality() -> 3) then you must have fields dependent of x,y,z ("u(x,y,z)" for example) So if you encounter a segmentation fault or a non valid matrix or rhs please double check your weak form (you can set UseKokkos to False to verify your integration) +Kindly note that the new GetFieldTransferOpKokkos will fall back to default GetFieldTransferOpCpp if : +- Source mesh space dimension < target mesh space dimension_tp +- Elementfilter is defined +- DifferentialOperator is defined +It is not a strict limitation, and the lastest limitation will be implemented in a future release Api Change: - (SymWeakForm.py) The parameter sdim is now required in functions GetField, GetTestFields New functionalities: +- (MeshFieldOperations.py) Add new GetFieldTransferOpKokkos that returns the field transfer operator with portable CPU/GPU support based on mesh connectivity (not on dof DofNumbering) - (PickleTools.py) Add support to read pickles files from Muscat 2.4 and earlier. - (FilterOperators.py) ensure the operator always return a vector of MuscatIndex - (AnisotropicMetricComputation.py) Add a new function GeometricMeanOfMatrices to compute the weighted mean of metrics @@ -60,6 +70,7 @@ Corrections: Improvement: +- (CMake project) Fetch and install Kokkos & ArborX if not found with GPU arch auto-detection - add @property node to CMesh to be capable to extract the nodes - (Documentation) A new example of strain computation has been added in the documentation - Move PartitionedMesh.py to Muscat.MeshContainers.PartitionedMesh diff --git a/README.md b/README.md index e24e063a2b47e82586de735983edc95b589a7f7a..5243b4eb6aad801304c3db9b363f606496ac7991 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ All questions can be addressed using the Issues system of Gitlab https://gitlab. Dependencies ============ - python minimal version: 3.9 + python minimal version: 3.10 PYTHON OPEN-SOURCE DEPENDENCIES @@ -94,6 +94,10 @@ Dependencies * Eigen (http://eigen.tuxfamily.org) (the pypi eigency package has the Eigen library already inside the package) ( a conda-forge package is available for eigen) + * Kokkos (https://github.com/kokkos/kokkos) + ( fetched in this project via CMake) + * ArborX (https://github.com/arborx/ArborX) + ( fetched in this project via CMake) THIRD-PARTY PROPRIETARY DEPENDENCIES (optional): diff --git a/cmake/Requirements.cmake b/cmake/Requirements.cmake index b765c64af66dd26291055d6ad4ef6db87538421a..4eee0f6071c675ee50e799e05bc11cba51b1f7d4 100644 --- a/cmake/Requirements.cmake +++ b/cmake/Requirements.cmake @@ -11,13 +11,25 @@ find_package(Boost REQUIRED) find_package(Eigen3 3.4 REQUIRED CONFIG) # Requirements for the Kokkos native library -if (Muscat_ENABLE_CUDA) +find_package(OpenMP MODULE COMPONENTS CXX) +find_package(Kokkos 4.7 CONFIG) + +if (Kokkos_ENABLE_CUDA) find_package(CUDAToolkit REQUIRED) endif() -find_package(OpenMP MODULE COMPONENTS CXX) -find_package(Kokkos REQUIRED CONFIG) -if(Kokkos_FOUND) - add_compile_definitions(WITH_KOKKOS) + +# Requirements for the ArborX library +find_package(ArborX 2.0 CONFIG) +if(ArborX_FOUND) + message(STATUS "Found ArborX: ${ArborX_DIR} (version \"${ArborX_VERSION}\")") +else() + include(FetchContent) + FetchContent_Declare( + ArborX + GIT_REPOSITORY https://github.com/arborx/ArborX.git + GIT_TAG v2.0.1 + ) + FetchContent_MakeAvailable(ArborX) endif() # Requirements for the Python library diff --git a/cpp_generators/CMakeLists.txt b/cpp_generators/CMakeLists.txt index 8c074031fa8699dadb9d6cac7d2e512aaf9fc3ef..bd41efffc3a37d96130b49f5b76a6fdfc3c698d7 100644 --- a/cpp_generators/CMakeLists.txt +++ b/cpp_generators/CMakeLists.txt @@ -5,6 +5,7 @@ set(CPP_SRC_GENERATORS SpaceGenerator.py GeoSpaceGenerator.py EnumGenerator.py + EvaluatorsGenerator.py ) # Cpp Generated (init to empty list) ############################################################ diff --git a/cpp_generators/EvaluatorsGenerator.py b/cpp_generators/EvaluatorsGenerator.py new file mode 100644 index 0000000000000000000000000000000000000000..7eaedca52762196ecab2feab272cba4fa838680a --- /dev/null +++ b/cpp_generators/EvaluatorsGenerator.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +# +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# + +from typing import Tuple, List +from Muscat.MeshContainers.ElementsDescription import ElementsInfo, ElementDescription, ElementType, GeoSupport + + +def GenerateFunctionEvaluator(path: str): + import Muscat.FE.Spaces.FESpaces as FES + + with open(path, "w", encoding="utf8") as cppFile: + def GenerateFESpaceFunction(FEspace: FES.FESpace): + cppFile.write(f""" static KOKKOS_INLINE_FUNCTION void evaluateShapeFunctionIn{FEspace.name}(const ElementType& type, const MatrixD31& baryCoords, Eigen::Matrix& values){{\n + switch(type) {{\n""") + for elementType in ElementType: + if elementType == ElementType.Element_NA: + continue + cppFile.write(f" case ElementType::{elementType.name}:\n") + structName = f"{elementType.name}_{FEspace.name}" + cppFile.write( + f" {structName}::UpdateShapeFunctionsValues(baryCoords[0], baryCoords[1], baryCoords[2], values.block<1, {structName}::numberOfShapeFunctions>(0,0));\n") + cppFile.write(" break;\n") + cppFile.write(" }\n}\n") + + def GenerateFEDerSpaceFunction(FEspace: FES.FESpace): + cppFile.write(f""" static KOKKOS_INLINE_FUNCTION void evaluateShapeFunctionDerIn{FEspace.name}(const ElementType& type, const MatrixD31& baryCoords, Eigen::Matrix& values){{\n + switch(type) {{\n""") + for elementType in ElementType: + if elementType == ElementType.Element_NA: + continue + cppFile.write( + f" case ElementType::{elementType.name}: {{\n") + structName = f"{elementType.name}_{FEspace.name}" + cppFile.write( + f" Eigen::Matrix res;\n") + cppFile.write( + f" {structName}::UpdateShapeFunctionsDerValues(baryCoords[0], baryCoords[1], baryCoords[2], res);\n") + cppFile.write( + f" values.block<{structName}::dimensionality, {structName}::numberOfShapeFunctions>(0,0) = res;\n") + cppFile.write(" break;}\n") + cppFile.write(" }\n}\n") + + cppFile.write(""" +# pragma once +# include "MeshTools/TransferBackEnds/Kokkos/Utils/FieldTransferTools.h" +namespace Muscat::KK::FieldTransfer{\n +""") + for FEspace in FES.GetListOfAvailableFESpaces(): + GenerateFESpaceFunction(FEspace) + GenerateFEDerSpaceFunction(FEspace) + + cppFile.write(""" +KOKKOS_FUNCTION void evaluateShapeFunctionInSpace(const FESpace& space, const ElementType& type, const MatrixD31& baryCoords, Eigen::Matrix& values) +{ + switch(space) {\n""") + + for FEspace in FES.GetListOfAvailableFESpaces(): + FESpaceName = FEspace.name + cppFile.write(" case FESpace::"+FESpaceName+":\n") + cppFile.write( + f" evaluateShapeFunctionIn{FESpaceName}(type, baryCoords, values);\n") + cppFile.write(" break;\n") + cppFile.write("""\n } +}\n""") + + cppFile.write(""" +KOKKOS_FUNCTION void evaluateShapeFunctionDerInSpace(const FESpace& space, const ElementType& type, const MatrixD31& baryCoords, Eigen::Matrix& values) +{ + switch(space) {\n""") + + for FEspace in FES.GetListOfAvailableFESpaces(): + FESpaceName = FEspace.name + cppFile.write(" case FESpace::"+FESpaceName+":\n") + cppFile.write( + f" evaluateShapeFunctionDerIn{FESpaceName}(type, baryCoords, values);\n") + cppFile.write(" break;\n") + cppFile.write("""\n } +}\n""") + + cppFile.write(""" +}; // namespace Muscat::KK::FieldTransfer\n""") + + +def GenerateJacobianEvaluator(path): + + import Muscat.FE.Spaces.FESpaces as FES + + with open(path, "w", encoding="utf8") as cppFile: + # def GenerateFESpaceFunction(FEspace: FES.FESpace): + cppFile.write(""" +# pragma once + +# include "MeshContainers/ElementsDescription.h" +# include "MeshContainers/GeneratedGeoSpace.h" +# include "LinAlg/Kokkos/BasicOperationsKokkos.h" +# include "LinAlg/EigenTypes.h" +# include +# include "MeshTools/TransferBackEnds/Kokkos/Utils/Consts.hxx" +namespace Muscat::KK::FieldTransfer{ +""") + + cppFile.write(""" +template +KOKKOS_INLINE_FUNCTION void InverseJacobianDeterminantFor(const ElementType& type, Eigen::Matrix& valdphidxi, Eigen::Matrix Xcoor, Eigen::Matrix& jac, double& jDet, Eigen::Matrix& jinv) { + switch (type) { +""") + + for elemDim in range(4): + for elemType in ElementType: + if elemType == ElementType.Element_NA or ElementsInfo[elemType].dimension != elemDim: + continue + cppFile.write( + f" case ElementType::{elemType.name}:\n") + cppFile.write(f""" if constexpr (spaceDimension >= {elemDim}){{ + Eigen::Matrix jacRes; + Eigen::Matrix jInvRes; + ElementHelper::SpaceDimElementDim::InverseJacobianDeterminant(valdphidxi, Xcoor, jacRes, jDet, jInvRes); + jac.template block<{elemDim}, spaceDimension>(0, 0) = jacRes; + jinv.template block(0, 0) = jInvRes; + break; + }} else Kokkos::abort("Expected dimension >= {elemDim}"); +""") + + cppFile.write(""" + default: + char errorMsg[] = "Unexpected element type XXX"; + errorMsg[sizeof(errorMsg) - 4] = '0' + ((int) type / 100); + errorMsg[sizeof(errorMsg) - 3] = '0' + (((int) type / 10) % 10); + errorMsg[sizeof(errorMsg) - 2] = '0' + ((int) type % 10); + Kokkos::abort(errorMsg); + break; + }; +};""") + cppFile.write(""" +}; // namespace Muscat: : KK: : FieldTransfer\n""") + +# Template of a generator function. Take a look at Tools.py:generate() for further explanation + + +def Generate(path: str, id: int): + if id == 0: + GenerateFunctionEvaluator(path) + return + if id == 1: + GenerateJacobianEvaluator(path) + return + raise + +# Usage: FUnctionEvaluatorGenerator.py -> Print the list of generated files +# Usage: FUnctionEvaluatorGenerator.py -> Generate file number at + + +if __name__ == '__main__': # pragma no cover + from BaseGenerator import generate + generate([("MeshTools/TransferBackEnds/Kokkos", "GeneratedShapeFunctionEvaluator.h"), + ("MeshTools/TransferBackEnds/Kokkos", "GeneratedJacobianEvaluator.hxx")], Generate) diff --git a/cpp_src/CMakeLists.txt b/cpp_src/CMakeLists.txt index 985e1c31e66a53672c4910c409de5591ede55ee2..6771d0e2f1a4a77be8db962db1b6e1589a3e81bf 100644 --- a/cpp_src/CMakeLists.txt +++ b/cpp_src/CMakeLists.txt @@ -47,11 +47,38 @@ set(CPP_HDR set(CPP_KK_SRC Helpers/Kokkos/KokkosHelper.cpp MeshTools/TransferBackEnds/FieldTransfer.cpp + ) set(CPP_KK_HDR Helpers/Kokkos/KokkosHelper.h MeshTools/TransferBackEnds/FieldTransfer.h + MeshTools/TransferBackEnds/Kokkos/Utils/FieldTransferTools.h + MeshTools/TransferBackEnds/Kokkos/ComputeSkin/ComputeLinearSkin.hxx + MeshTools/TransferBackEnds/Kokkos/Transfers/FTEntry.hxx + MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterClamp.hxx + MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterExtrap.hxx + MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterNearest.hxx + MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterZero.hxx + MeshTools/TransferBackEnds/Kokkos/Transfers/FTNearest.hxx + MeshTools/TransferBackEnds/Kokkos/Transfers/FTUtils.hxx + MeshTools/TransferBackEnds/Kokkos/Transfers/SkinProjection.hxx + MeshTools/TransferBackEnds/Kokkos/Utils/Consts.hxx + MeshTools/TransferBackEnds/Kokkos/Utils/Params.hxx + MeshTools/TransferBackEnds/Kokkos/Utils/SparsedMatCOO.hxx + MeshTools/TransferBackEnds/Kokkos/Utils/Transfer.hxx + MeshTools/TransferBackEnds/Kokkos/FieldTransferKokkos.hxx + MeshTools/TransferBackEnds/Kokkos/NewtonKokkos.hxx + Containers/Kokkos/MeshKokkos.h + FE/Kokkos/SpaceAtIP.h + FE/Kokkos/NativeIntegrationKokkos.h + FE/Kokkos/NativeIntegrationKokkos.hpp + Containers/Kokkos/MeshKokkos.h + FE/Kokkos/SpaceAtIP.h + FE/Kokkos/NativeIntegrationKokkos.h + FE/Kokkos/NativeIntegrationKokkos.hpp + LinAlg/Kokkos/Utils.h + LinAlg/Kokkos/Utils.hpp Containers/Kokkos/MeshKokkos.h FE/Kokkos/SpaceAtIP.h FE/Kokkos/NativeIntegrationKokkos.h @@ -95,14 +122,14 @@ target_link_libraries(${NativeLib} PUBLIC Eigen3::Eigen) target_link_libraries(${NativeLib} PUBLIC OpenMP::OpenMP_CXX) target_link_libraries(${NativeLib} PUBLIC Mmg::libmmg3d_so Mmg::libmmg2d_so Mmg::libmmgs_so) set_target_properties(${NativeLib} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) -if(Muscat_ENABLE_CUDA) +if(Kokkos_ENABLE_CUDA) target_compile_options(${NativeLib} PRIVATE) endif() set(NativeLib ${NativeLib} PARENT_SCOPE) #Export it outside of the file target_sources(${NativeLibKokkos} PUBLIC FILE_SET HEADERS BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR} FILES ${CPP_KK_HDR}) target_link_libraries(${NativeLibKokkos} PRIVATE ${NativeLib}) -target_link_libraries(${NativeLibKokkos} PUBLIC Eigen3::Eigen Kokkos::kokkos OpenMP::OpenMP_CXX) +target_link_libraries(${NativeLibKokkos} PUBLIC Eigen3::Eigen Kokkos::kokkos ArborX::ArborX) set(NativeLibKokkos ${NativeLibKokkos} PARENT_SCOPE) #Export it outside of the file add_subdirectory(Tests) diff --git a/cpp_src/FE/Kokkos/NativeIntegrationKernelKokkos.hpp b/cpp_src/FE/Kokkos/NativeIntegrationKernelKokkos.hpp index 32e9d2e3f9488c3eab9f2c4a619d785607f3de6f..a60eae45004a0193cc38776c487312a2f7f6fb7e 100644 --- a/cpp_src/FE/Kokkos/NativeIntegrationKernelKokkos.hpp +++ b/cpp_src/FE/Kokkos/NativeIntegrationKernelKokkos.hpp @@ -381,8 +381,7 @@ void DoIntegrationOnDimElementKernel(IntegralKokkos& self, } } } - } - }); + } }); Kokkos::Profiling::popRegion(); }; diff --git a/cpp_src/FE/Kokkos/Numbering.h b/cpp_src/FE/Kokkos/Numbering.h index bc9802b54d964c89a773ccb93a0631cbcb0f029a..c74c052b3dd92d2226a7b23488e6e5e95c3e09a2 100644 --- a/cpp_src/FE/Kokkos/Numbering.h +++ b/cpp_src/FE/Kokkos/Numbering.h @@ -22,13 +22,13 @@ template struct KokkosNumberings { // ElementType, numbering ID, element, dof - //using OuterViewType = std::vector; - using OuterViewType = Kokkos2DViewsListStorage; + // using OuterViewType = std::vector; + using OuterViewType = Kokkos2DViewsListStorage; std::map storage; std::vector sizes; - std::map > localsizes; - std::map numberOfElements; + std::map > localsizes; + std::map numberOfElements; CMuscatIndex GetSize(CMuscatIndex i) { return this->sizes[i]; } void AllocateElementTypesAndNumberings(MeshKokkos meshKokkos, int i) { for (const auto& [elementType, container] : meshKokkos.elements) { @@ -36,11 +36,10 @@ struct KokkosNumberings { numberOfElements[elementType] = container.GetNumberOfElements(); } this->sizes.resize(i); - } void SetNumberingFP(const int& i, const Muscat::DofNumbering& feNumbering) { for (const auto& [elementType, numbering] : feNumbering) { - if(localsizes.find(elementType) != localsizes.end()){ + if (localsizes.find(elementType) != localsizes.end()) { localsizes[elementType].push_back(localsizes[elementType].back() + numbering->cols()); } } @@ -48,9 +47,9 @@ struct KokkosNumberings { void SetFromHost(const int& i, const Muscat::DofNumbering& feNumbering) { this->sizes[i] = feNumbering.GetSize(); - if (i == 0){ + if (i == 0) { for (const auto& [elementType, numbering] : feNumbering) { - if(localsizes.find(elementType) != localsizes.end()){ + if (localsizes.find(elementType) != localsizes.end()) { storage[elementType].SetCapacity(this->sizes.size(), numberOfElements[elementType], this->localsizes[elementType]); } } diff --git a/cpp_src/FE/Kokkos/SpaceAtIP.h b/cpp_src/FE/Kokkos/SpaceAtIP.h index dd4dd1e42b1447e09c1dfcb0b19117d76ffd3e66..fa725c2fde34b96df6ae9c048514380a2c36bfe1 100644 --- a/cpp_src/FE/Kokkos/SpaceAtIP.h +++ b/cpp_src/FE/Kokkos/SpaceAtIP.h @@ -23,7 +23,7 @@ struct ElementSpaceAtIPKokkos { int numberOfIntegrationPoints{0}; Kokkos::View valN{0, 0}; // IntegrationPoint, NumberOfShapeFunctions Kokkos::View valdphidxi{0, 0, 0}; // IntegrationPoint, elementDim, NumberOfShapeFunctions - ElementSpaceAtIPKokkos(): dimensionality(-1),numberOfShapeFunctions(0),numberOfIntegrationPoints(0), valN("valN_h",0, 0), valdphidxi("valdphidxi_h",0, 0, 0) {} + ElementSpaceAtIPKokkos() : dimensionality(-1), numberOfShapeFunctions(0), numberOfIntegrationPoints(0), valN("valN_h", 0, 0), valdphidxi("valdphidxi_h", 0, 0, 0) {} void SetFromHost(ElementType elementType, const Muscat::ElementSpace& ES, const Muscat::ElementaryQuadrature& EIR); void SetFromHost(ElementType elementType, const Muscat::ElementSpaceAtIP& ESIP); }; @@ -34,18 +34,18 @@ class OuterElementSpaceAtIPKokkosStorage; // host version with no copy template <> struct OuterElementSpaceAtIPKokkosStorage : public std::vector > { - using ReturnStruct = ElementSpaceAtIPKokkos ; - void SetCapacity(int i, int numberOfIntegrationPoints, int dimensionality){ - this->resize(i,ElementSpaceAtIPKokkos()); + using ReturnStruct = ElementSpaceAtIPKokkos; + void SetCapacity(int i, int numberOfIntegrationPoints, int dimensionality) { + this->resize(i, ElementSpaceAtIPKokkos()); } - }; +}; // cuda version with contiguous memory #ifdef KOKKOS_ENABLE_CUDA template <> struct OuterElementSpaceAtIPKokkosStorage { - using MemSpace = typename DeviceExecutionSpace::memory_space; + using MemorySpace = typename DeviceExecutionSpace::memory_space; Kokkos::View valN; // IntegrationPoint, NumberOfShapeFunctions Kokkos::View valdphidxi; @@ -65,9 +65,9 @@ struct OuterElementSpaceAtIPKokkosStorage { 27); // need to change 27 to numberOfShapeFunctions Kokkos::View valdphidxi_h("valdphidxi_h", - numberOfIntegrationPoints, - dimensionality, - 27); + numberOfIntegrationPoints, + dimensionality, + 27); ElementSpaceAtIP saip = EvaluateSpaceAt(es, EIR); @@ -79,8 +79,8 @@ struct OuterElementSpaceAtIPKokkosStorage { } } } - Kokkos::deep_copy(valN, Kokkos::create_mirror_view_and_copy(MemSpace(), valN_h)); - Kokkos::deep_copy(valdphidxi, Kokkos::create_mirror_view_and_copy(MemSpace(), valdphidxi_h)); + Kokkos::deep_copy(valN, Kokkos::create_mirror_view_and_copy(MemorySpace(), valN_h)); + Kokkos::deep_copy(valdphidxi, Kokkos::create_mirror_view_and_copy(MemorySpace(), valdphidxi_h)); }; void SetFromHost(ElementType elementType, const Muscat::ElementSpaceAtIP& ESIP) { // need to code thid function @@ -106,8 +106,8 @@ struct OuterElementSpaceAtIPKokkosStorage { } } } - Kokkos::deep_copy(valN, Kokkos::create_mirror_view_and_copy(MemSpace(), valN_h)); - Kokkos::deep_copy(valdphidxi, Kokkos::create_mirror_view_and_copy(MemSpace(), valdphidxi_h)); + Kokkos::deep_copy(valN, Kokkos::create_mirror_view_and_copy(MemorySpace(), valN_h)); + Kokkos::deep_copy(valdphidxi, Kokkos::create_mirror_view_and_copy(MemorySpace(), valdphidxi_h)); }; }; KOKKOS_INLINE_FUNCTION auto operator[](const long i) const { @@ -117,8 +117,8 @@ struct OuterElementSpaceAtIPKokkosStorage { }; void SetCapacity(int i, int numberOfIntegrationPoints, int dimensionality) { - this->valN = Kokkos::View(Kokkos::view_alloc(MemSpace(), Kokkos::WithoutInitializing, " valN "), i, numberOfIntegrationPoints, 27); - this->valdphidxi = Kokkos::View(Kokkos::view_alloc(MemSpace(), Kokkos::WithoutInitializing, " valdphidxi "), i, numberOfIntegrationPoints, dimensionality, 27); + this->valN = Kokkos::View(Kokkos::view_alloc(MemorySpace(), Kokkos::WithoutInitializing, " valN "), i, numberOfIntegrationPoints, 27); + this->valdphidxi = Kokkos::View(Kokkos::view_alloc(MemorySpace(), Kokkos::WithoutInitializing, " valdphidxi "), i, numberOfIntegrationPoints, dimensionality, 27); } KOKKOS_INLINE_FUNCTION auto size() { return this->valN.extent(0); @@ -153,9 +153,9 @@ struct KokkosSpaces { } } - void SetFromHost(const int& i, const Muscat::Space& FeSpace, const MeshQuadrature& sir,const MeshKokkos& meshKokkos ) { + void SetFromHost(const int& i, const Muscat::Space& FeSpace, const MeshQuadrature& sir, const MeshKokkos& meshKokkos) { for (const auto& [elementType, monoElements] : meshKokkos.elements) { - const auto& elementSpace = FeSpace.storage.at(elementType); + const auto& elementSpace = FeSpace.storage.at(elementType); if (this->storage[elementType].size() != this->numberOfSpaces) { storage[elementType].SetCapacity(this->numberOfSpaces, sir.GetIR(elementType).GetNumberOfPoints(), Muscat::elementsInfo[elementType].dimensionality()); } diff --git a/cpp_src/FE/Kokkos/Storage/Csr.hxx b/cpp_src/FE/Kokkos/Storage/Csr.hxx index a4384ae2188ddb75c0afc81f76bef715e8912089..6204b20c38bbb2c9d13046df459dbec43fd06ee8 100644 --- a/cpp_src/FE/Kokkos/Storage/Csr.hxx +++ b/cpp_src/FE/Kokkos/Storage/Csr.hxx @@ -19,11 +19,11 @@ template * Example: to store (1,2,3), (4,5) in memory, values would contain (1,2,3,4,5) and indices (0,3,5) */ class CSRList { - using MemSpace = typename ExecSpace::memory_space; + using MemorySpace = typename ExecSpace::memory_space; public: - Kokkos::DualView values; - Kokkos::DualView indices; + Kokkos::DualView values; + Kokkos::DualView indices; CSRList() = default; @@ -36,7 +36,7 @@ class CSRList { * @param[in] pos the position of the entry. * @return A subview of the value view holding the values of the vector at position "pos". */ - KOKKOS_INLINE_FUNCTION Kokkos::View entryAt_device(int pos) const { + KOKKOS_INLINE_FUNCTION Kokkos::View entryAt_device(int pos) const { return Kokkos::subview(values.view_device(), Kokkos::make_pair(indices.view_device()(pos), indices.view_device()(pos + 1))); } @@ -54,7 +54,7 @@ class CSRList { return indices.extent(0) - 1; } - CSRList(Kokkos::View fromIndices, Kokkos::View fromValues) : indices(fromIndices), values(fromValues) {}; + CSRList(Kokkos::View fromIndices, Kokkos::View fromValues) : indices(fromIndices), values(fromValues) {}; void Sync() { indices.modify_host(); diff --git a/cpp_src/FE/Kokkos/Storage/Storage.h b/cpp_src/FE/Kokkos/Storage/Storage.h index ca244f1a1fd323ca6a8ed505fe8cef032d493ab2..06e913328bca5d5981ff9a6b2b76b6bf9c181a48 100644 --- a/cpp_src/FE/Kokkos/Storage/Storage.h +++ b/cpp_src/FE/Kokkos/Storage/Storage.h @@ -54,11 +54,11 @@ struct Kokkos2DViewsListStorage : public std::vec template class Kokkos2DViewsListStorage { - using MemSpace = typename DeviceExecutionSpace::memory_space; + using MemorySpace = typename DeviceExecutionSpace::memory_space; public: - Kokkos::View values; - Kokkos::View indices; + Kokkos::View values; + Kokkos::View indices; std::vector numberOfColumns; using InnerViewType = Kokkos::View; @@ -71,8 +71,8 @@ class Kokkos2DViewsListStorage { void SetCapacity(int i, int numberOfRows, const std::vector& numberOfColumns) { // keep a copy of the numberOfColumns this->numberOfColumns = numberOfColumns; - this->indices = SendToDevice(this->numberOfColumns.size(), this->numberOfColumns.data()); // Kokkos::View(Kokkos::view_alloc(MemSpace(), " indices "), i+1); - this->values = Kokkos::View(Kokkos::view_alloc(MemSpace(), Kokkos::WithoutInitializing, " values "), numberOfRows, numberOfColumns.back()); + this->indices = SendToDevice(this->numberOfColumns.size(), this->numberOfColumns.data()); // Kokkos::View(Kokkos::view_alloc(MemorySpace(), " indices "), i+1); + this->values = Kokkos::View(Kokkos::view_alloc(MemorySpace(), Kokkos::WithoutInitializing, " values "), numberOfRows, numberOfColumns.back()); } /** diff --git a/cpp_src/Helpers/Kokkos/KokkosHelper.h b/cpp_src/Helpers/Kokkos/KokkosHelper.h index 316dd96cedef47f43d2ef561f570e2a492af8aa7..2085f730287fa16d7ccef9004f94802c845a890a 100644 --- a/cpp_src/Helpers/Kokkos/KokkosHelper.h +++ b/cpp_src/Helpers/Kokkos/KokkosHelper.h @@ -7,10 +7,7 @@ #include #include - -#include -#include -#include +#include #include namespace Muscat::KK { @@ -31,9 +28,9 @@ template Kokkos::View SendToDevice(const MapMatrixIDD& data) { Kokkos::View > data_host( data.data(), - std::size_t(data.rows()), - std::size_t(data.cols())); - return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space{}, data_host); + size_t(data.rows()), + size_t(data.cols())); + return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space(), data_host); }; template @@ -45,8 +42,8 @@ template Kokkos::View SendToDevice(const MapMatrixDDD& data) { Kokkos::View data_host( data.data(), - std::size_t(data.rows()), - std::size_t(data.cols())); + size_t(data.rows()), + size_t(data.cols())); return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space(), data_host); }; @@ -59,8 +56,8 @@ template Kokkos::View SendToDevice(std::shared_ptr& data) { Kokkos::View data_host( data->data(), - std::size_t(data->rows()), - std::size_t(data->cols())); + size_t(data->rows()), + size_t(data->cols())); return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space(), data_host); }; @@ -68,7 +65,7 @@ template Kokkos::View SendToDevice1D(const MatrixID1& data) { Kokkos::View > data_h( data.data(), - std::size_t(data.rows())); + size_t(data.rows())); return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space(), data_h); }; @@ -76,23 +73,23 @@ template Kokkos::View SendToDevice1D(const Eigen::DenseBase& data) { if (data.rows() > 1 && data.cols() > 1) { std::cout << "can treat only 1D matrices" << std::endl; - std::exit(1); + exit(1); } if (data.rows() == 0 || data.cols() == 0) { - return Kokkos::View{}; + return Kokkos::View(); } if (data.rows() > 1) { Kokkos::View > data_h( &data(0, 0), - std::size_t(data.rows())); - return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space{}, data_h); + size_t(data.rows())); + return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space(), data_h); } else { Kokkos::View > data_h( &data(0, 0), - std::size_t(data.cols())); - return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space{}, data_h); + size_t(data.cols())); + return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space(), data_h); } assert(0); }; @@ -105,19 +102,19 @@ Kokkos::View SendToDevice1DI(const Eigen::DenseB } if (data.rows() == 0 || data.cols() == 0) { - return Kokkos::View{}; + return Kokkos::View(); } if (data.rows() > 1) { Kokkos::View > data_h( &data(0, 0), - std::size_t(data.rows())); - return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space{}, data_h); + size_t(data.rows())); + return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space(), data_h); } else { Kokkos::View > data_h( &data(0, 0), - std::size_t(data.cols())); - return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space{}, data_h); + size_t(data.cols())); + return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space(), data_h); } assert(0); }; @@ -126,16 +123,16 @@ template Kokkos::View SendToDevice(const CMuscatIndex size, const CMuscatIndex* data) { Kokkos::View data_h( data, - std::size_t(size)); - return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space{}, data_h); + size_t(size)); + return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space(), data_h); } - +// template Kokkos::View SendToDevice(const CMuscatIndex size, const CMuscatFloat* data) { Kokkos::View data_h( data, - std::size_t(size)); - return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space{}, data_h); + size_t(size)); + return Kokkos::create_mirror_view_and_copy(typename ExecSpace::memory_space(), data_h); } -} // namespace Muscat::KK +}; // namespace Muscat::KK diff --git a/cpp_src/LinAlg/BasicOperations.h b/cpp_src/LinAlg/BasicOperations.h index 090c05291e9563689bff279679afe479ba6882e1..4dac71b45282a617d82871c99ada80f0d7f6ae99 100644 --- a/cpp_src/LinAlg/BasicOperations.h +++ b/cpp_src/LinAlg/BasicOperations.h @@ -20,9 +20,9 @@ class SpaceDimElementDim { public: /** Compute and return the Jacobian, det(jacobian) and the inverse of the Jacobian * Warning, the jacobian (and it inverse) are not necessarily square matrices - */ + */ template - static void InverseJacobianDeterminant(const AnyEigenArrayA& /*valdphidxi*/, const AnyEigenArrayB& /*xcoor*/, JT& Jacobian, double& Jdet, IJT& Jinv) { + static void InverseJacobianDeterminant(const AnyEigenArrayA& /*valdphidxi*/, const AnyEigenArrayB& /*xcoor*/, JT& Jacobian, double& Jdet, IJT& Jinv) { Jdet = 1.; Jacobian(0, 0) = 1.; Jinv(0, 0) = 1.; @@ -31,23 +31,23 @@ class SpaceDimElementDim { template static void Normal(JT& Jacobian, MatrixD31& Normal) {} static constexpr bool CanProvideNormal() { return false; } /**< return true if this type of element has a well defined normal */ - static constexpr unsigned int size = 1; /**< number of lines in the jacobian matrix (cols of the Jinv) */ - static constexpr unsigned int spaceSize = 1; /**< Dimensionality of the ambient space */ + static constexpr unsigned int size = 1; /**< number of lines in the jacobian matrix (cols of the Jinv) */ + static constexpr unsigned int spaceSize = 1; /**< Dimensionality of the ambient space */ /** Function to resize the containers to store the results */ template static void ResizeContainers(JT& Jacobian, IJT& Jinv) { Jacobian.resize(1, 1); Jinv.resize(1, 1); } - static MatrixD11 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD11 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD11 res; - res << b(0,0)/A(0,0); - return res ; + res << b(0, 0) / A(0, 0); + return res; } - static MatrixD11 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD11 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD11 res; - res << b(0,0)/A(0,0); - return res ; + res << b(0, 0) / A(0, 0); + return res; } }; //-------------- 1D elements --------------- @@ -56,7 +56,7 @@ class SpaceDimElementDim<1, 1> { public: /** Compute and return the Jacobian, det(jacobian) and the inverse of the Jacobian * Warning, the jacobian (and it inverse) are not necessarily square matrices - */ + */ template static void InverseJacobianDeterminant(const AnyEigenArrayA& valdphidxi, const AnyEigenArrayB& xcoor, JT& Jacobian, double& Jdet, IJT& Jinv) { Jacobian = valdphidxi.template topRows<1>() * xcoor; @@ -68,23 +68,23 @@ class SpaceDimElementDim<1, 1> { template static void Normal(JT& Jacobian, MatrixD31& Normal) {} static constexpr bool CanProvideNormal() { return false; } /**< return true if this type of element has a well defined normal */ - static constexpr unsigned int size = 1U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ - static constexpr unsigned int spaceSize = 1; /**< Dimensionality of the ambient space */ + static constexpr unsigned int size = 1U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ + static constexpr unsigned int spaceSize = 1; /**< Dimensionality of the ambient space */ /** Function to resize the containers to store the results */ template static void ResizeContainers(JT& Jacobian, IJT& Jinv) { Jacobian.resize(1, 1); Jinv.resize(1, 1); } - static MatrixD11 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD11 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD11 res; - res << b(0,0)/A(0,0); - return res ; + res << b(0, 0) / A(0, 0); + return res; } - static MatrixD11 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD11 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD11 res; - res << b(0,0)/A(0,0); - return res ; + res << b(0, 0) / A(0, 0); + return res; } }; @@ -93,7 +93,7 @@ class SpaceDimElementDim<2, 1> { public: /** Compute and return the Jacobian, det(jacobian) and the inverse of the Jacobian * Warning, the jacobian (and it inverse) are not necessarily square matrices - */ + */ template static void InverseJacobianDeterminant(const AnyEigenArrayA& valdphidxi, const AnyEigenArrayB& xcoor, JT& Jacobian, double& Jdet, IJT& Jinv) { // in a 2D space @@ -124,24 +124,24 @@ class SpaceDimElementDim<2, 1> { Normal(2, 0) = 0; } static constexpr bool CanProvideNormal() { return true; } /**< return true if this type of element has a well defined normal */ - static constexpr unsigned int size = 1U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ - static constexpr unsigned int spaceSize = 2U; /**< Dimensionality of the ambient space */ + static constexpr unsigned int size = 1U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ + static constexpr unsigned int spaceSize = 2U; /**< Dimensionality of the ambient space */ /** Function to resize the containers to store the results */ template static void ResizeContainers(JT& Jacobian, IJT& Jinv) { Jacobian.resize(1, 2); Jinv.resize(1, 2); } - static MatrixD22 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD22 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD22 hinv; CMuscatFloat det = 0.; inv22(A, hinv, det); - return hinv * b; + return hinv * b; } - static MatrixD11 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD11 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD11 res; - res << b(0,0)/A(0,0); - return res ; + res << b(0, 0) / A(0, 0); + return res; } }; @@ -150,7 +150,7 @@ class SpaceDimElementDim<3, 1> { public: /** Compute and return the Jacobian, det(jacobian) and the inverse of the Jacobian * Warning, the jacobian (and it inverse) are not necessarily square matrices - */ + */ template static void InverseJacobianDeterminant(const AnyEigenArrayA& valdphidxi, const AnyEigenArrayB& xcoor, JT& Jacobian, double& Jdet, IJT& Jinv) { Jacobian = valdphidxi.template topRows<1>() * xcoor; @@ -161,9 +161,9 @@ class SpaceDimElementDim<3, 1> { MatrixD31 N0; MatrixD31 N1; if (der0 == 0 && der1 == 0) { - Jinv(0,0) = 0; - Jinv(1,0) = 0; - Jinv(2,0) = 1/der2; + Jinv(0, 0) = 0; + Jinv(1, 0) = 0; + Jinv(2, 0) = 1 / der2; } else { N1 << 0, 0, 1; N0 = Jacobian.row(0).template head<3>().cross(N1); @@ -187,24 +187,24 @@ class SpaceDimElementDim<3, 1> { Normal /= Normal.norm(); } static constexpr bool CanProvideNormal() { return true; } /**< return true if this type of element has a well defined normal */ - static constexpr unsigned int size = 1U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ - static constexpr unsigned int spaceSize = 3U; /**< Dimensionality of the ambient space */ + static constexpr unsigned int size = 1U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ + static constexpr unsigned int spaceSize = 3U; /**< Dimensionality of the ambient space */ /** Function to resize the containers to store the results */ template static void ResizeContainers(JT& Jacobian, IJT& Jinv) { Jacobian.resize(1, 3); Jinv.resize(1, 3); } - static MatrixD31 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD31 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD33 hinv; CMuscatFloat det = 0.; inv33(A, hinv, det); - return hinv * b; + return hinv * b; } - static MatrixD11 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD11 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD11 res; - res << b(0,0)/A(0,0); - return res ; + res << b(0, 0) / A(0, 0); + return res; } }; @@ -214,7 +214,7 @@ class SpaceDimElementDim<2, 2> { public: /** Compute and return the Jacobian, det(jacobian) and the inverse of the Jacobian * Warning, the jacobian (and it inverse) are not necessarily square matrices - */ + */ template static void InverseJacobianDeterminant(const AnyEigenArrayA& valdphidxi, const AnyEigenArrayB& xcoor, JT& Jacobian, double& Jdet, IJT& Jinv) { Jacobian = valdphidxi.template topRows<2>() * xcoor; @@ -224,25 +224,25 @@ class SpaceDimElementDim<2, 2> { template static void Normal(JT& Jacobian, MatrixD31& Normal) {} static constexpr bool CanProvideNormal() { return false; } /**< return true if this type of element has a well defined normal */ - static constexpr unsigned int size = 2U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ - static constexpr unsigned int spaceSize = 2U; /**< Dimensionality of the ambient space */ + static constexpr unsigned int size = 2U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ + static constexpr unsigned int spaceSize = 2U; /**< Dimensionality of the ambient space */ /** Function to resize the containers to store the results */ template static void ResizeContainers(JT& Jacobian, IJT& Jinv) { Jacobian.resize(2, 2); Jinv.resize(2, 2); } - static MatrixD21 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD21 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD22 hinv; CMuscatFloat det = 0.; inv22(A, hinv, det); - return hinv * b; + return hinv * b; } - static MatrixD21 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD21 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD22 hinv; CMuscatFloat det = 0.; inv22(A, hinv, det); - return hinv * b; + return hinv * b; } }; @@ -251,7 +251,7 @@ class SpaceDimElementDim<3, 2> { public: /** Compute and return the Jacobian, det(jacobian) and the inverse of the Jacobian * Warning, the jacobian (and it inverse) are not necessarily square matrices - */ + */ template static void InverseJacobianDeterminant(const AnyEigenArrayA& valdphidxi, const AnyEigenArrayB& xcoor, JT& Jacobian, double& Jdet, IJT& Jinv) { Jacobian = valdphidxi.template topRows<2>() * xcoor; @@ -271,25 +271,25 @@ class SpaceDimElementDim<3, 2> { Normal /= Normal.norm(); } static constexpr bool CanProvideNormal() { return true; } /**< return true if this type of element has a well defined normal */ - static constexpr unsigned int size = 2U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ - static constexpr unsigned int spaceSize = 3U; /**< Dimensionality of the ambient space */ + static constexpr unsigned int size = 2U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ + static constexpr unsigned int spaceSize = 3U; /**< Dimensionality of the ambient space */ /** Function to resize the containers to store the results */ template static void ResizeContainers(JT& Jacobian, IJT& Jinv) { Jacobian.resize(2, 3); Jinv.resize(2, 3); } - static MatrixD31 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD31 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD33 hinv; CMuscatFloat det = 0.; inv33(A, hinv, det); - return hinv * b; + return hinv * b; } - static MatrixD21 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD21 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD22 hinv; CMuscatFloat det = 0.; inv22(A, hinv, det); - return hinv * b; + return hinv * b; } }; //-------------- 3D elements --------------- @@ -298,7 +298,7 @@ class SpaceDimElementDim<3, 3> { public: /** Compute and return the Jacobian, det(jacobian) and the inverse of the Jacobian * Warning, the jacobian (and it inverse) are not necessarily square matrices - */ + */ template static void InverseJacobianDeterminant(const AnyEigenArrayA& valdphidxi, const AnyEigenArrayB& xcoor, JT& Jacobian, double& Jdet, IJT& Jinv) { Jacobian = valdphidxi.template topRows<3>() * xcoor; @@ -308,25 +308,25 @@ class SpaceDimElementDim<3, 3> { template static void Normal(JT& Jacobian, MatrixD31& Normal) {} static constexpr bool CanProvideNormal() { return false; } /**< return true if this type of element has a well defined normal */ - static constexpr unsigned int size = 3U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ - static constexpr unsigned int spaceSize = 3U; /**< Dimensionality of the ambient space */ + static constexpr unsigned int size = 3U; /**< number of lines in the jacobian matrix (cols of the Jinv) */ + static constexpr unsigned int spaceSize = 3U; /**< Dimensionality of the ambient space */ template /** Function to resize the containers to store the results */ static void ResizeContainers(JT& Jacobian, IJT& Jinv) { Jacobian.resize(3, 3); Jinv.resize(3, 3); } - static MatrixD31 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD31 SpaceDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD33 hinv; CMuscatFloat det = 0.; inv33(A, hinv, det); - return hinv * b; + return hinv * b; } - static MatrixD31 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b){ + static MatrixD31 ElemDimSolve(const MatrixDDD& A, const MatrixDDD b) { MatrixD33 hinv; CMuscatFloat det = 0.; inv33(A, hinv, det); - return hinv * b; + return hinv * b; } }; diff --git a/cpp_src/LinAlg/Kokkos/BasicOperationsKokkos.h b/cpp_src/LinAlg/Kokkos/BasicOperationsKokkos.h index 90e3fd5babed0cb7d053635e8b1b28b9070895d9..572045e6361a0a766f4f8b37ebd231e33f9300f3 100644 --- a/cpp_src/LinAlg/Kokkos/BasicOperationsKokkos.h +++ b/cpp_src/LinAlg/Kokkos/BasicOperationsKokkos.h @@ -3,6 +3,8 @@ // file 'LICENSE.txt', which is part of this source code package. // +#pragma once + #include #include @@ -12,12 +14,12 @@ namespace Muscat::KK { template KOKKOS_INLINE_FUNCTION void JacobianComputationIndirect(T1& Jacobian, T2& valdphidxi, T3& nodes, T4& connectivity) { - Eigen::Map< Eigen::Matrix, Eigen::Aligned8, Eigen::Stride<3,1> > JacobianE(&Jacobian(0, 0),size, spaceSize); - Eigen::Map< Eigen::Matrix, Eigen::Aligned8 > valdphidxiE(&valdphidxi(0, 0), size, connectivity.extent(0)); - Eigen::Map, Eigen::Aligned8 > connectivityE(&connectivity(0), connectivity.extent(0)); - Eigen::Map, Eigen::Aligned8, Eigen::Stride > nodesE(nodes.data(), nodes.extent(0), spaceSize ); - //const auto xcoor = nodesE(connectivityE, Eigen::seqN(0,spaceSize)); // change this Eigen::all - //JacobianE = valdphidxiE.template topRows() * xcoor; + Eigen::Map, Eigen::Aligned8, Eigen::Stride<3, 1> > JacobianE(&Jacobian(0, 0), size, spaceSize); + Eigen::Map, Eigen::Aligned8> valdphidxiE(&valdphidxi(0, 0), size, connectivity.extent(0)); + Eigen::Map, Eigen::Aligned8> connectivityE(&connectivity(0), connectivity.extent(0)); + Eigen::Map, Eigen::Aligned8, Eigen::Stride > nodesE(nodes.data(), nodes.extent(0), spaceSize); + // const auto xcoor = nodesE(connectivityE, Eigen::seqN(0,spaceSize)); // change this Eigen::all + // JacobianE = valdphidxiE.template topRows() * xcoor; // implementation only with kokkos view for (long unsigned int i = 0; i < size; ++i) { @@ -38,36 +40,29 @@ KOKKOS_INLINE_FUNCTION void inv22TEMPLATE(T1& m, T2& minv, T3& det) { minv(1, 0) = -m(1, 0) / det(); minv(0, 1) = -m(0, 1) / det(); minv(1, 1) = m(0, 0) / det(); + return; +}; + +template +KOKKOS_INLINE_FUNCTION void inv22TEMPLATE(T1& m, T2& minv) { + minv = m.inverse(); + return; }; template KOKKOS_INLINE_FUNCTION void inv33TEMPLATE(T1 m, T2 minv, T3 det) { - - Eigen::Map< Eigen::Matrix, Eigen::Aligned8, Eigen::Stride<3,1> > mE(&m(0, 0),3, 3); - Eigen::Map< Eigen::Matrix, Eigen::Aligned8, Eigen::Stride<3,1> > minvE(&minv(0, 0),3, 3); + Eigen::Map, Eigen::Aligned8, Eigen::Stride<3, 1> > mE(&m(0, 0), 3, 3); + Eigen::Map, Eigen::Aligned8, Eigen::Stride<3, 1> > minvE(&minv(0, 0), 3, 3); minvE = mE.inverse(); - det() = mE.determinant() ; + det() = mE.determinant(); return; +}; - // implementation only with kokkos view - // minv(0, 0) = (m(1, 1) * m(2, 2) - m(2, 1) * m(1, 2)); - // minv(0, 1) = (m(0, 2) * m(2, 1) - m(0, 1) * m(2, 2)); - // minv(0, 2) = (m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1)); - // minv(1, 0) = (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2)); - // minv(1, 1) = (m(0, 0) * m(2, 2) - m(0, 2) * m(2, 0)); - // minv(1, 2) = (m(1, 0) * m(0, 2) - m(0, 0) * m(1, 2)); - // minv(2, 0) = (m(1, 0) * m(2, 1) - m(2, 0) * m(1, 1)); - // minv(2, 1) = (m(2, 0) * m(0, 1) - m(0, 0) * m(2, 1)); - // minv(2, 2) = (m(0, 0) * m(1, 1) - m(1, 0) * m(0, 1)); - - // det() = m(0, 0) * minv(0, 0) + m(0, 1) * minv(1, 0) + m(0, 2) * minv(2, 0); - - // for (size_t i = 0; i < 3; i++) { - // for (size_t j = 0; j < 3; j++) { - // minv(i, j) /= det(); - // } - // } +template +KOKKOS_INLINE_FUNCTION void inv33TEMPLATE(T1 m, T2 minv) { + minv = m.inverse(); + return; }; namespace ElementHelper { @@ -88,7 +83,13 @@ class SpaceDimElementDim { Jacobian(0, 0) = 1.; Jinv(0, 0) = 1.; return; - }; + } + template + KOKKOS_INLINE_FUNCTION static MatrixD11 ElemDimSolve(const T1& A, const T2& b) { + MatrixD11 res; + res << b(0, 0) / A(0, 0); + return res; + } }; //-------------- 1D elements --------------- template @@ -105,7 +106,13 @@ class SpaceDimElementDim { Jinv(0, 0) = 1 / Jdet(); ; return; - }; + } + template + KOKKOS_INLINE_FUNCTION static MatrixD11 ElemDimSolve(const T1& A, const T2& b) { + MatrixD11 res; + res << b(0, 0) / A(0, 0); + return res; + } }; template @@ -141,7 +148,13 @@ class SpaceDimElementDim { Jacobian(2, 0) = Jacobian(0, 1) / Jdet(); Jacobian(2, 1) = -Jacobian(0, 0) / Jdet(); Jacobian(2, 2) = 0; - }; + } + template + KOKKOS_INLINE_FUNCTION static MatrixD11 ElemDimSolve(const T1& A, const T2& b) { + MatrixD11 res; + res << b(0, 0) / A(0, 0); + return res; + } }; template @@ -199,6 +212,12 @@ class SpaceDimElementDim { inv33TEMPLATE(Jacobian, Jinv, Jdet); } } + template + KOKKOS_INLINE_FUNCTION static MatrixD11 ElemDimSolve(const T1& A, const T2& b) { + MatrixD11 res; + res << b(0, 0) / A(0, 0); + return res; + } }; //-------------- 2D elements --------------- @@ -214,7 +233,13 @@ class SpaceDimElementDim { JacobianComputationIndirect(Jacobian, valdphidxi, nodes, connectivity); inv22TEMPLATE(Jacobian, Jinv, Jdet); return; - }; + } + template + KOKKOS_INLINE_FUNCTION static MatrixD21 ElemDimSolve(const T1& A, const T2& b) { + MatrixD22 hinv; + inv22TEMPLATE(A, hinv); + return hinv * b; + } }; template @@ -244,7 +269,13 @@ class SpaceDimElementDim { Jacobian(2, 1) /= Jdet(); Jacobian(2, 2) /= Jdet(); inv33TEMPLATE(Jacobian, Jinv, Jdet); - }; + } + template + KOKKOS_INLINE_FUNCTION static MatrixD21 ElemDimSolve(const T1& A, const T2& b) { + MatrixD22 hinv; + inv22TEMPLATE(A, hinv); + return hinv * b; + } }; //-------------- 3D elements --------------- template @@ -258,7 +289,11 @@ class SpaceDimElementDim { KOKKOS_INLINE_FUNCTION static void InverseJacobianDeterminantIndirect(T1 valdphidxi, T2 nodes, T3 connectivity, T4 Jacobian, T5 Jdet, T6 Jinv) { JacobianComputationIndirect(Jacobian, valdphidxi, nodes, connectivity); inv33TEMPLATE(Jacobian, Jinv, Jdet); - }; + } + template + KOKKOS_INLINE_FUNCTION static MatrixD31 ElemDimSolve(const T1& A, const T2& b) { + return A.inverse() * b; + } }; } // namespace ElementHelper diff --git a/cpp_src/LinAlg/Kokkos/Utils.hpp b/cpp_src/LinAlg/Kokkos/Utils.hpp index 03ed9239edbc5e97bec990f2339ab3e1ed67220d..e650b7f4e2dbbf97f7eba08ed5dc135c2d630680 100644 --- a/cpp_src/LinAlg/Kokkos/Utils.hpp +++ b/cpp_src/LinAlg/Kokkos/Utils.hpp @@ -21,7 +21,7 @@ namespace Muscat::KK { template std::pair GetUniqueRows(MapMatrixIDD inMat, MapMatrixID1 outIndices, CMuscatIndex pivot) { - using MemSpace = typename ExecSpace::memory_space; + using MemorySpace = typename ExecSpace::memory_space; CMuscatIndex* in = inMat.data(); CMuscatIndex* indices = outIndices.data(); @@ -43,7 +43,7 @@ std::pair GetUniqueRows(MapMatrixIDD inMat, MapMatri A_h(in, size, row_size); // Create A_d, its device equivalent auto A_d = Kokkos::create_mirror( - Kokkos::view_alloc(MemSpace(), Kokkos::WithoutInitializing), A_h); + Kokkos::view_alloc(MemorySpace(), Kokkos::WithoutInitializing), A_h); Kokkos::deep_copy(A_d, A_h); Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("Sort rows"); @@ -62,7 +62,7 @@ std::pair GetUniqueRows(MapMatrixIDD inMat, MapMatri using ViewType = decltype(A_col_0); using CompType = Kokkos::BinOp1D; - using DeviceType = Kokkos::Device; + using DeviceType = Kokkos::Device; const auto [min_val, max_val] = std::minmax_element(in, in + size * row_size); diff --git a/cpp_src/MeshContainers/ElementUtilities.h b/cpp_src/MeshContainers/ElementUtilities.h index 97ea2073047de65cb25a1179fd852bcf84540b2c..fe663c8cd0d0bf950dcc44146b6e9be9129f1608 100644 --- a/cpp_src/MeshContainers/ElementUtilities.h +++ b/cpp_src/MeshContainers/ElementUtilities.h @@ -1,34 +1,41 @@ #pragma once #include +#include #include namespace Muscat { +/** Tolerance to check if barycentric coord is inside a reference finite element*/ +constexpr double _EPS_BARY = 1.0e-5; + +// TODO Ramzi: replace with KokkosClamp +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) + template -void ClampComplex(MatrixD31& coord) { +KOKKOS_INLINE_FUNCTION void ClampComplex(MatrixD31& coord) { for (int i = 0; i < dim; ++i) { - coord(i, 0) = std::clamp(coord(i, 0), 0.0, 1.0); + coord(i, 0) = MAX(MIN(coord(i, 0), 1.0), 0.0); } } - template -void ClampParamCoordinatesGeo(MatrixD31& coord){ - throw "not implemented in the field transfer"; +KOKKOS_INLINE_FUNCTION void ClampParamCoordinatesGeo(MatrixD31& coord){ + Kokkos::abort("not implemented in the field transfer"); }; template<> -void ClampParamCoordinatesGeo(MatrixD31& coord){} +KOKKOS_INLINE_FUNCTION void ClampParamCoordinatesGeo(MatrixD31& coord){} template<> -void ClampParamCoordinatesGeo(MatrixD31& coord){ +KOKKOS_INLINE_FUNCTION void ClampParamCoordinatesGeo(MatrixD31& coord){ ClampComplex<1>(coord); } template<> -void ClampParamCoordinatesGeo(MatrixD31& coord){ +KOKKOS_INLINE_FUNCTION void ClampParamCoordinatesGeo(MatrixD31& coord){ if (coord(0, 0) + coord(1, 0) > 1) { const double dif = coord(0, 0) - coord(1, 0); coord(0, 0) = (1.0 + dif) / 2.0; @@ -38,12 +45,12 @@ void ClampParamCoordinatesGeo(MatrixD31& coord){ } template<> -void ClampParamCoordinatesGeo(MatrixD31& coord){ +KOKKOS_INLINE_FUNCTION void ClampParamCoordinatesGeo(MatrixD31& coord){ ClampComplex<2>(coord); } template<> -void ClampParamCoordinatesGeo(MatrixD31& coord){ +KOKKOS_INLINE_FUNCTION void ClampParamCoordinatesGeo(MatrixD31& coord){ const double s = coord(0, 0) + coord(1, 0) + coord(2, 0); if (s > 1) { coord.array() += (1.0 - s) / 3.0; @@ -52,7 +59,7 @@ void ClampParamCoordinatesGeo(MatrixD31& coord){ } template<> -void ClampParamCoordinatesGeo(MatrixD31& coord){ +KOKKOS_INLINE_FUNCTION void ClampParamCoordinatesGeo(MatrixD31& coord){ if (coord(0, 0) + coord(1, 0) > 1.0) { const double dif = coord(0, 0) - coord(1, 0); coord(0, 0) = (1.0 + dif) / 2.0; @@ -62,15 +69,15 @@ void ClampParamCoordinatesGeo(MatrixD31& coord){ } template<> -void ClampParamCoordinatesGeo(MatrixD31& coord){ +KOKKOS_INLINE_FUNCTION void ClampParamCoordinatesGeo(MatrixD31& coord){ ClampComplex<3>(coord); } -MatrixD31 ClampParamCoordinates(const GeoSupport& geoSupport, MatrixD31 coord) { +KOKKOS_INLINE_FUNCTION MatrixD31 ClampParamCoordinates(const GeoSupport& geoSupport, MatrixD31 coord) { switch(geoSupport){ case GeoSupport::GeoNA:{ - throw "call ClampParamCoordinates with GeoSupport::GeoNA is not supported!!!!"; + Kokkos::abort("call ClampParamCoordinates with GeoSupport::GeoNA is not supported!!!!"); } case GeoSupport::GeoPoint:{ ClampParamCoordinatesGeo(coord); } case GeoSupport::GeoBar :{ @@ -88,10 +95,96 @@ MatrixD31 ClampParamCoordinates(const GeoSupport& geoSupport, MatrixD31 coord) { } case GeoSupport::GeoHex :{ ClampParamCoordinatesGeo(coord); } default:{ - throw "call ClampParamCoordinates with an unknow GeoSupport is not supported!!!!"; + Kokkos::abort("call ClampParamCoordinates with an unknow GeoSupport is not supported!!!!"); } } return coord; } +template +KOKKOS_INLINE_FUNCTION bool IsBaryCoordInsideGeo(MatrixD31& coord){ + Kokkos::abort("not implemented in the field transfer"); + return false; +}; + +template<> +KOKKOS_INLINE_FUNCTION bool IsBaryCoordInsideGeo(MatrixD31& coord){ + return true; +} + +template<> +KOKKOS_INLINE_FUNCTION bool IsBaryCoordInsideGeo(MatrixD31& coord){ + return ( Kokkos::abs(coord(0, 0) - 0.5) < 0.5 + _EPS_BARY ); +} + +template<> +KOKKOS_INLINE_FUNCTION bool IsBaryCoordInsideGeo(MatrixD31& coord){ + return ( coord(0, 0) + _EPS_BARY > 0. ) && + ( coord(1, 0) + _EPS_BARY > 0. ) && + ( coord(0, 0) + coord(1, 0) < 1.0 + _EPS_BARY ); +} + +template<> +KOKKOS_INLINE_FUNCTION bool IsBaryCoordInsideGeo(MatrixD31& coord){ + return ( Kokkos::abs(coord(0, 0) - 0.5) < 0.5 + _EPS_BARY ) && + ( Kokkos::abs(coord(1, 0) - 0.5) < 0.5 + _EPS_BARY ); +} +template<> +KOKKOS_INLINE_FUNCTION bool IsBaryCoordInsideGeo(MatrixD31& coord){ + return ( coord(0, 0) + _EPS_BARY > 0. ) && + ( coord(1, 0) + _EPS_BARY > 0. ) && + ( coord(2, 0) + _EPS_BARY > 0. ) && + ( coord(0, 0) + coord(1, 0) + coord(2, 0) < 1.0 + _EPS_BARY ); +} + +template<> +KOKKOS_INLINE_FUNCTION bool IsBaryCoordInsideGeo(MatrixD31& coord){ + const auto s = 1.0 - coord(2, 0); + return ( Kokkos::abs(coord(0, 0) - 0.5) < 0.5*s + _EPS_BARY ) && + ( Kokkos::abs(coord(1, 0) - 0.5) < 0.5*s + _EPS_BARY ) && + ( Kokkos::abs(coord(2, 0) - 0.5) < 0.5 + _EPS_BARY ); +} + +template<> +KOKKOS_INLINE_FUNCTION bool IsBaryCoordInsideGeo(MatrixD31& coord){ + return ( coord(0, 0) + _EPS_BARY > 0. ) && + ( coord(1, 0) + _EPS_BARY > 0. ) && + ( coord(0, 0) + coord(1, 0) < 1.0 + _EPS_BARY ) && + ( Kokkos::abs(coord(2, 0) - 0.5) < 0.5 + _EPS_BARY ); +} + +template<> +KOKKOS_INLINE_FUNCTION bool IsBaryCoordInsideGeo(MatrixD31& coord){ + return ( Kokkos::abs(coord(0, 0) - 0.5) < 0.5 + _EPS_BARY ) && + ( Kokkos::abs(coord(1, 0) - 0.5) < 0.5 + _EPS_BARY ) && + ( Kokkos::abs(coord(2, 0) - 0.5) < 0.5 + _EPS_BARY ); +} + +KOKKOS_INLINE_FUNCTION bool IsBaryCoordInside(const GeoSupport& geoSupport, MatrixD31 coord) { + + switch(geoSupport){ + case GeoSupport::GeoNA:{ + Kokkos::abort("call IsBaryCoordInside with GeoSupport::GeoNA is not supported!!!!"); + } case GeoSupport::GeoPoint:{ + return IsBaryCoordInsideGeo(coord); + } case GeoSupport::GeoBar :{ + return IsBaryCoordInsideGeo(coord); + } case GeoSupport::GeoTri :{ + return IsBaryCoordInsideGeo(coord); + } case GeoSupport::GeoQuad :{ + return IsBaryCoordInsideGeo(coord); + } case GeoSupport::GeoTet :{ + return IsBaryCoordInsideGeo(coord); + } case GeoSupport::GeoPyr :{ + return IsBaryCoordInsideGeo(coord); + } case GeoSupport::GeoWed :{ + return IsBaryCoordInsideGeo(coord); + } case GeoSupport::GeoHex :{ + return IsBaryCoordInsideGeo(coord); + } default:{ + Kokkos::abort("call IsBaryCoordInside with an unknown GeoSupport is not supported!!!!"); + } + } +} + } // namespace Muscat diff --git a/cpp_src/MeshTools/TransferBackEnds/FieldTransfer.cpp b/cpp_src/MeshTools/TransferBackEnds/FieldTransfer.cpp index 4feca70319b944f57942c5a8fa1c269e25ac83fc..f7d8e3c6ab98b903cd0744a6ab293aa213f92e99 100644 --- a/cpp_src/MeshTools/TransferBackEnds/FieldTransfer.cpp +++ b/cpp_src/MeshTools/TransferBackEnds/FieldTransfer.cpp @@ -132,7 +132,6 @@ void ComputeBarycentricCoordinateSpaceDimOnElement(const Eigen::Matrix dN; - dN.resize(geoStruct::dimensionality, geoStruct::numberOfShapeFunctions); Eigen::Matrix dfNum; Eigen::Matrix h; diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/ComputeSkin/ComputeLinearSkin.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/ComputeSkin/ComputeLinearSkin.hxx new file mode 100644 index 0000000000000000000000000000000000000000..eebd9c86ff72b68e187f0fcdd9bd1d71fe598d06 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/ComputeSkin/ComputeLinearSkin.hxx @@ -0,0 +1,295 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +/** + * This file is part of the Field Transfer process. It handles the creation of the skin of the mesh. + * Skin here should be understood as the faces of the mesh that are not shared by 2 distinct elements. + * The term "face" refers to the d - 1 base elements (3D: triangle, 2D: bars, 1D: points). + */ + +#pragma once + +#include + +namespace Muscat::KK::FieldTransfer { + +template +class permute; + +template +class permute { + public: + KOKKOS_INLINE_FUNCTION static void permuteCol(T kokkos_sv) { + return; + } +}; +template +class permute { + public: + KOKKOS_INLINE_FUNCTION static void permuteCol(T kokkos_sv) { + if (kokkos_sv(0) > kokkos_sv(1)) { + Kokkos::kokkos_swap(kokkos_sv(0), kokkos_sv(1)); + } + return; + } +}; +template +class permute { + public: + KOKKOS_INLINE_FUNCTION static void permuteCol(T kokkos_sv) { + if (kokkos_sv(0) > kokkos_sv(1)) { + Kokkos::kokkos_swap(kokkos_sv(0), kokkos_sv(1)); + } + if (kokkos_sv(2) > kokkos_sv(3)) { + Kokkos::kokkos_swap(kokkos_sv(2), kokkos_sv(3)); + } + if (kokkos_sv(0) > kokkos_sv(2)) { + Kokkos::kokkos_swap(kokkos_sv(0), kokkos_sv(2)); + } + if (kokkos_sv(1) > kokkos_sv(3)) { + Kokkos::kokkos_swap(kokkos_sv(1), kokkos_sv(3)); + } + if (kokkos_sv(1) > kokkos_sv(2)) { + Kokkos::kokkos_swap(kokkos_sv(1), kokkos_sv(2)); + } + return; + } +}; + +/** + * @brief Compute linear skin algorithm, fills transfer->skinFaces and transfer->skinParents. + * @param[in] transfer + */ +template +void ComputeLinearSkin(TransferClassKokkos* transfer) { + Kokkos::Profiling::pushRegion("Compute Linear Skin"); + + using MemorySpace = typename ExecSpace::memory_space; + ExecSpace execSpace{}; + + // TODO: Rewrite -> Ugly ! + auto LFPE_val_vec = getLinearFacesPaddedEntries(spaceDimension); + auto LFPE_val = Kokkos::View>(LFPE_val_vec.data(), LFPE_val_vec.size()); + auto linearFacesPadded_val = Kokkos::create_mirror_view_and_copy(execSpace, LFPE_val); + auto LFPE_off_vec = getLinearFacesPaddedOffsets(spaceDimension); + auto LFPE_off = Kokkos::View>(LFPE_off_vec.data(), LFPE_off_vec.size()); + auto linearFacesPadded_off = Kokkos::create_mirror_view_and_copy(execSpace, LFPE_off); + + auto TF_val_vec = getTriFacesEntries(spaceDimension); + auto TF_val = Kokkos::View>(TF_val_vec.data(), TF_val_vec.size()); + auto TriFaces_val = Kokkos::create_mirror_view_and_copy(execSpace, TF_val); + auto TF_off_vec = getTriFacesOffsets(spaceDimension); + auto TF_off = Kokkos::View>(TF_off_vec.data(), TF_off_vec.size()); + auto TriFaces_off = Kokkos::create_mirror_view_and_copy(execSpace, TF_off); + + auto TLF_val_vec = getTriLocFacesEntries(spaceDimension); + auto TLF_val = Kokkos::View>(TLF_val_vec.data(), TLF_val_vec.size()); + auto TriLocFaces_val = Kokkos::create_mirror_view_and_copy(execSpace, TLF_val); + auto TLF_off_vec = getTriLocFacesOffsets(spaceDimension); + auto TLF_off = Kokkos::View>(TLF_off_vec.data(), TLF_off_vec.size()); + auto TriLocFaces_off = Kokkos::create_mirror_view_and_copy(execSpace, TLF_off); + + auto nbNode = transfer->nodes.extent(0); + auto nbLinFaces = transfer->nbLinearSkinFaces; + auto elementTypesPtr = transfer->elementTypes; + auto connValPtr = transfer->connectivity_values; + auto connOffPtr = transfer->connectivity_offsets; + + auto nbElem = elementTypesPtr.extent(0); + + int nodesPerFace = -1; + switch (spaceDimension) { + case 0: + nodesPerFace = 0; + break; + case 1: + nodesPerFace = 1; + break; + case 2: + nodesPerFace = 2; + break; + case 3: + nodesPerFace = 4; + break; + default: + std::cerr << "Invalid spaceDimension " << spaceDimension << std::endl; + assert(false); + break; + } + + auto linFaces = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "linFaces"), nbLinFaces, nodesPerFace + 1); // last column is elementtype + auto allFaceId = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "allFaceId"), nbLinFaces); // global linface id and local element face id + auto allFaceLocId = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "allFaceLocId"), nbLinFaces); // global linface id and local element face id + auto allParents = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "allParents"), nbLinFaces); + + const CMuscatIndex maxi = std::numeric_limits().max(); + + Kokkos::Profiling::pushRegion("Fill all faces and sort per row permute"); + // Avoid branching but very ugly: to change ! + Kokkos::parallel_scan( + "Fill all faces and sort per row : Col permutation", + Kokkos::RangePolicy(execSpace, 0, nbElem), + KOKKOS_LAMBDA(int64_t i, int64_t& partial_sum, bool is_final) { + auto ielType = (int)elementTypesPtr(i); + auto facesPadded = Kokkos::subview( + linearFacesPadded_val, Kokkos::make_pair(linearFacesPadded_off(ielType), linearFacesPadded_off(ielType + 1))); + auto localnbFaces = facesPadded.extent(0) / nodesPerFace; + auto firstIndex = partial_sum; + partial_sum += localnbFaces; + if (is_final) { + auto localConnectivity = Kokkos::subview(connValPtr, Kokkos::make_pair(connOffPtr(i), connOffPtr(i + 1))); + for (int64_t j = 0; j < localnbFaces; j++) { // loop on element faces + auto linFaces_sv = Kokkos::subview(linFaces, firstIndex + j, Kokkos::ALL); + allParents(firstIndex + j) = i; + allFaceId(firstIndex + j) = firstIndex + j; + allFaceLocId(firstIndex + j) = j; + for (int64_t k = 0; k < nodesPerFace - 1; k++) { // loop on non-padded face connectivity + auto index = facesPadded(j * nodesPerFace + k); + linFaces_sv(k) = localConnectivity(index); + } + auto index = facesPadded((j + 1) * nodesPerFace - 1); + linFaces_sv(nodesPerFace - 1) = (index == -1) ? maxi : localConnectivity(index); + using subviewType = decltype(linFaces_sv); + permute::permuteCol(linFaces_sv); + linFaces_sv(nodesPerFace) = ielType; + } + } + }); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Create permutation vector to sort linFaces by row"); + auto firstNodeIds = Kokkos::subview(linFaces, Kokkos::ALL(), 0); + + using IdViewType = decltype(firstNodeIds); + using CompType = Kokkos::BinOp1D; + using DeviceType = Kokkos::Device; + + Kokkos::BinSort + bin_sort(firstNodeIds, CompType(nbLinFaces, 0, nbNode), true /*sort within*/); + bin_sort.create_permute_vector(); + auto permutationVector = bin_sort.get_permute_vector(); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Permute and Compute Nbtris"); + + CMuscatIndex nbTris = 0; + Kokkos::parallel_reduce( + "Permute and Compute NbTris", Kokkos::RangePolicy(execSpace, 0, nbLinFaces), + KOKKOS_LAMBDA(const CMuscatIndex& i, CMuscatIndex& sum) { + auto index = permutationVector(i); + + // vector who would be at pos "i" in the (first column)-sorted matrix faces + auto vect = Kokkos::subview(linFaces, index, Kokkos::ALL()); + + CMuscatIndex curr_row_occur = 1; // Number of occurrences of the row (excluding current) + CMuscatIndex curr = i; + // Find the first row starting with vect(0) + while (curr > 0 && linFaces(permutationVector(curr), 0) == vect(0)) { + --curr; + } + // In case curr is 0, we do not increase + if (linFaces(permutationVector(curr), 0) != vect(0)) { + ++curr; + } + // For all elements starting with vect(0) + while (curr <= nbLinFaces - 1 && linFaces(permutationVector(curr), 0) == vect(0)) { + if (curr == i) { // skip current + ++curr; + continue; + } + bool diff = false; + // Check if identical + for (CMuscatIndex t = 1; t < nodesPerFace + 1; ++t) + if (linFaces(permutationVector(curr), t) != vect(t)) { + diff = true; + break; + } + if (!diff) { + ++curr_row_occur; + // if (curr_row_occur == 2) + break; + } + ++curr; + } + if (curr_row_occur == 2) { + allFaceId(index) = -1; + } else { + auto ielType = (int)elementTypesPtr(allParents(index)); + auto iTriLocFaces = Kokkos::subview( + TriLocFaces_val, Kokkos::make_pair(TriLocFaces_off(ielType), TriLocFaces_off(ielType + 1))); + auto iloc = allFaceLocId(index); + sum += (iTriLocFaces(iloc + 1) - iTriLocFaces(iloc)) / spaceDimension; + } + }, + nbTris); + Kokkos::fence(); + + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Remove"); + + // remove the -1 values while saving the relative order of elements (in place) + const auto end = Kokkos::Experimental::remove(execSpace, allFaceId, -1); + const int dist = Kokkos::Experimental::distance(Kokkos::Experimental::begin(allFaceId), end); // number of not -1 + + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Finalize: compute linear skin"); + + auto skinFaces = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "skinFaces"), nbTris, spaceDimension); + auto skinParents = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "skinParents"), nbTris); + + Kokkos::parallel_scan( + "Fill Skin Faces & Parents", Kokkos::RangePolicy(execSpace, 0, dist), + KOKKOS_LAMBDA(int64_t i, int64_t& partial_sum, bool is_final) { + auto index = allFaceId(i); + auto iParent = allParents(index); + auto ielType = (int)elementTypesPtr(iParent); + auto iTriFaces = Kokkos::subview( + TriFaces_val, Kokkos::make_pair(TriFaces_off(ielType), TriFaces_off(ielType + 1))); + auto iTriLocFaces = Kokkos::subview( + TriLocFaces_val, Kokkos::make_pair(TriLocFaces_off(ielType), TriLocFaces_off(ielType + 1))); + auto iloc = allFaceLocId(index); + auto localnbFaces = (iTriLocFaces(iloc + 1) - iTriLocFaces(iloc)) / spaceDimension; + auto firstIndex = partial_sum; + partial_sum += localnbFaces; + if (is_final) { + auto localConnectivity = Kokkos::subview(connValPtr, Kokkos::make_pair(connOffPtr(iParent), connOffPtr(iParent + 1))); + for (int64_t j = 0; j < localnbFaces; j++) { + skinParents(firstIndex + j) = iParent; + auto pos = iTriLocFaces(iloc) + j * spaceDimension; + for (int64_t k = 0; k < spaceDimension; k++) { + const auto iconn = iTriFaces(pos + k); + skinFaces(firstIndex + j, k) = localConnectivity(iconn); + } + } + } + }); + Kokkos::fence(); + transfer->skinFaces = skinFaces; + transfer->skinParents = skinParents; + + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::popRegion(); + + return; +} + +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/FieldTransferKokkos.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/FieldTransferKokkos.hxx new file mode 100644 index 0000000000000000000000000000000000000000..1d6ab5c101d546b0865057ee6eb0d38616188d84 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/FieldTransferKokkos.hxx @@ -0,0 +1,55 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include + +#include "MeshTools/TransferBackEnds/Kokkos/Transfers/FTEntry.hxx" + +namespace Muscat::KK::FieldTransfer { +template +static SparsedMatCOO FieldTransfer(TransferParams& params, Mesh& m, MapMatrixDDD& targetPoints, MapMatrixID1& resultStatus, MapMatrixID1& resultElement) { + ExecSpace execSpace{}; + TransferClassKokkos transfer(params); + + SetupTransferClassKokkos(transfer, m, targetPoints); + SparsedMatCOO res; + res = ComputeFieldTransfer(&transfer); + Kokkos::fence("Wait for sync"); + auto statusPtr = (TransferStatus*)resultStatus.data(); + auto status = Kokkos::View(statusPtr, resultStatus.rows()); + auto elements = Kokkos::View(resultElement.data(), resultElement.rows()); + Kokkos::deep_copy(status, transfer.targetStatus); + Kokkos::deep_copy(elements, transfer.targetElement); + Kokkos::fence(); + return res; +}; + +template +/** + * @brief This function is the main entry for the field transfer algorithm(s). + * @param[in] params The transfer method parameters : dimension of the transfer (3, 2, or 1), transfer method, etc. + * @param[in] m the input mesh which the field will be transferred from. + * @param[in] targetPoints the target points which we want the values. + * @param[in] resultStatus the preallocated eigen matrix which will be filled with the status for each target point. It should be of size nbTargetPoint * 1. + * @param[in] resultElement the preallocated eigen matrix which will be filled with the element result for each target point. The element result is the ID of each element that contains the associated target point. It should be of size nbTargetPoint * 1. + * @return The transfer operator matrix in the shape of a sparse matrix. + */ +SparsedMatCOO FieldTransferWrapper(TransferParams& params, Mesh& m, MapMatrixDDD& targetPoints, MapMatrixID1& resultStatus, MapMatrixID1& resultElement) { + if (params.dimension == 1) { + return FieldTransfer(params, m, targetPoints, resultStatus, resultElement); + } + if (params.dimension == 2) { + return FieldTransfer(params, m, targetPoints, resultStatus, resultElement); + } + if (params.dimension == 3) { + return FieldTransfer(params, m, targetPoints, resultStatus, resultElement); + } + assert(false); + return {}; +} + +}; // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/NewtonKokkos.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/NewtonKokkos.hxx new file mode 100644 index 0000000000000000000000000000000000000000..3e0ae0580f9a53bcedb02e7a937214c35b9ec04e --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/NewtonKokkos.hxx @@ -0,0 +1,106 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include "LinAlg/Kokkos/BasicOperationsKokkos.h" + +namespace Muscat::KK::FieldTransfer { + +/** Tolerance to stop the newton, dxietaphi.squaredNorm()*/ +#define D_XI_ETA_PHI_TOL_SQUARED 1e-10 +/** Maximal number of newton iters */ +#define MAX_NEWTON_ITER 25 + +template +static KOKKOS_INLINE_FUNCTION Eigen::Matrix DdfPart2(const A f, const C& /*dN*/, const D& coordAtDofs, const MatrixD31& xietaphi) { + Eigen::Matrix res; + res.fill(0.0); + Eigen::Matrix temp; + for (int i = 0; i < coordAtDofs.cols(); ++i) { + temp.fill(0.0); + for (int j = 0; j < coordAtDofs.rows(); ++j) { + Eigen::Matrix ddNi; + ddNi.setZero(); + geoStruct::UpdateShapeFunctionsDerDerValues(j, xietaphi(0, 0), xietaphi(1, 0), xietaphi(2, 0), ddNi); + temp += ddNi * coordAtDofs(j, i); + } + res -= f(0, i) * temp; + } + return res; +} + +template +/** + * @brief The Newton method applied to a finite element. The targetPoint is updated to hold the barycentric coordinates ultimately. + * @param[in] Xcoor The coordinates for each of the node of the element. + * @param[in] targetPoint The coordinates of the point in space we are looking for its barycentric coordinate. + * @param[out] val Evaluation of the shape function at the barycentric coordinate. + * @return true if the target point is contained in the finite element. + */ +KOKKOS_FUNCTION bool ApplyNewton(const Eigen::Matrix& Xcoor, Eigen::Matrix& targetPoint, Kokkos::View val, const bool forceEvaluation = false) { + using ExecSpace = MemorySpace::execution_space; + using EH = ElementHelper::SpaceDimElementDim; + MatrixD31 xietaphi; + xietaphi.fill(0.5); + + Eigen::Matrix xcoor; + for (int i = 0; i < geoStruct::numberOfShapeFunctions; i++) { + for (int j = 0; j < spaceDimension; j++) { + xcoor(i, j) = Xcoor(i, j); + } + } + + Eigen::Matrix n; + + int x = 0; + + geoStruct::UpdateShapeFunctionsValues(xietaphi(0, 0), xietaphi(1, 0), xietaphi(2, 0), n); + + for (; x < MAX_NEWTON_ITER; ++x) { + Eigen::Matrix f = n.transpose() * xcoor; + f -= targetPoint; + + Eigen::Matrix dN; + + geoStruct::UpdateShapeFunctionsDerValues(xietaphi(0, 0), xietaphi(1, 0), xietaphi(2, 0), dN); + + Eigen::Matrix dfNum = (f * ((dN * xcoor).transpose())).transpose(); + Eigen::Matrix h = (dN * xcoor) * (dN * xcoor).transpose(); + + if (x > 9) { + h += DdfPart2(-f, dN, xcoor, xietaphi); + } + + Eigen::Matrix dxietaphi = EH::ElemDimSolve(h, dfNum); + + xietaphi.block(0, 0, geoStruct::dimensionality, 1) -= dxietaphi.block(0, 0, geoStruct::dimensionality, 1); + + geoStruct::UpdateShapeFunctionsValues(xietaphi(0, 0), xietaphi(1, 0), xietaphi(2, 0), n); + + if constexpr (geoStruct::linear) { + break; + } + if (dxietaphi.squaredNorm() < D_XI_ETA_PHI_TOL_SQUARED) { + break; + } + } + + if (forceEvaluation) { + for (int i = 0; i < val.extent(0); ++i) { + val(i) = n(i); + } + return true; + } + + if (x < MAX_NEWTON_ITER && Muscat::IsBaryCoordInsideGeo(xietaphi)) { + for (int i = 0; i < val.extent(0); ++i) { + val(i) = n(i); + } + return true; + } + return false; +} +}; // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTEntry.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTEntry.hxx new file mode 100644 index 0000000000000000000000000000000000000000..03bc039140b287b664754114192f019b3120f133 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTEntry.hxx @@ -0,0 +1,54 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include "FTUtils.hxx" + +#include "MeshTools/TransferBackEnds/Kokkos/Transfers/FTNearest.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterNearest.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterZero.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterClamp.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterExtrap.hxx" + +namespace Muscat::KK::FieldTransfer { + +template +/** + * @brief Main entry to the different methods of the field transfer. + * @param[in,out] transfer The transfer class holding informations. Both targetStatus and targetElement will be populated accordingly. + * @return The sparse matrix representing the transfer operator in COO format. + */ +SparsedMatCOO ComputeFieldTransfer(TransferClassKokkos* transfer) { + ExecSpace execSpace{}; + using MemorySpace = typename ExecSpace::memory_space; + auto method = transfer->method; + + SparsedMatCOO res; + + switch (method) { + case TransferMethods::NEAREST: // nearest/nearest + res = FTNearest(transfer); + break; + case TransferMethods::INTER: // interp/nearest + res = FTInterNearest(transfer); + break; + case TransferMethods::ZERO_FILL: // interp/zero_fill + res = FTInterZero(transfer); + break; + case TransferMethods::EXTRAP: // interp/extrap + res = FTInterExtrap(transfer); + break; + case TransferMethods::CLAMP: // interp/clamp + res = FTInterClamp(transfer); + break; + default: + std::cerr << "Invalid transfer method" << std::endl; + assert(false); + break; + } + return res; +} +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterClamp.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterClamp.hxx new file mode 100644 index 0000000000000000000000000000000000000000..b7bb083b53591004e910d56e49230e5ef9deb301 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterClamp.hxx @@ -0,0 +1,100 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include "FTUtils.hxx" +#include "SkinProjection.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/ComputeSkin/ComputeLinearSkin.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/Utils/ArborXCallbacks.hxx" + +namespace Muscat::KK::FieldTransfer { + +template +/** + * @brief Compute the target point finite element interpolation if intersects or assign nearest element interpolation values after projection of the target point if outside all elements. + * @param[in] transfer the transfer class holding informations. + * @return sparse coo matrix containing interpolation coefficient aka field transfer operator + */ +SparsedMatCOO FTInterClamp(TransferClassKokkos* transfer) { + Kokkos::Profiling::pushRegion("FTInterClamp"); + + using MemorySpace = typename ExecSpace::memory_space; + using Box = ArborX::Box; + + ExecSpace execSpace{}; + + auto targetPointsPtr = transfer->targetPoints; + auto targetElemPtr = transfer->targetElement; + auto targetStatusPtr = transfer->targetStatus; + auto elementsTypePtr = transfer->elementTypes; + auto nodesPtr = transfer->nodes; + auto maxConnSizePerElt = transfer->maxConnSizePerElt; + + auto nbtargetPoints = targetPointsPtr.extent(0); + + Kokkos::Profiling::pushRegion("Compute target point FE intersection"); + auto [col2d_d, val2d_d, countEntries] = + ComputeBoxTargetPointIntersection(transfer); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Compute target point projection on triangulated skin mesh"); + ComputeLinearSkin(transfer); + if constexpr (spaceDimension == 1) + ComputeProjectionOn1DSkin(transfer, col2d_d, val2d_d, countEntries, false); + else if constexpr (spaceDimension == 2) + ComputeProjectionOn2DSkin(transfer, col2d_d, val2d_d, countEntries, false); + else if constexpr (spaceDimension == 3) + ComputeProjectionOn3DSkin(transfer, col2d_d, val2d_d, countEntries, false); + else + throw std::runtime_error("Unexpected dimension for projection"); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + // Can we avoid this reduction and branching + int nnz = 0; + Kokkos::Profiling::pushRegion("Get nnz"); + Kokkos::parallel_reduce( + "Compute COO operator functor", + Kokkos::RangePolicy(execSpace, 0, nbtargetPoints), + KOKKOS_LAMBDA(int i, int& nbEntries) { + nbEntries += countEntries(i); + }, + nnz); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + // Scan (assemble) COO operator ! + Kokkos::Profiling::pushRegion("Scan COO operator"); + auto matCOO = SparsedMatCOO(nnz); + auto rows_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "row_d"), nnz); + auto cols_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "col_d"), nnz); + auto vals_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "data_d"), nnz); + Kokkos::parallel_scan( + "Scan COO operator functor", + Kokkos::RangePolicy(execSpace, 0, nbtargetPoints), + KOKKOS_LAMBDA(int64_t i, int64_t& partial_sum, bool is_final) { + auto firstIndex = partial_sum; + partial_sum += countEntries(i); + if (is_final) { + for (int64_t j = 0; j < countEntries(i); j++) { + rows_d(firstIndex + j) = i; + cols_d(firstIndex + j) = col2d_d(i, j); + vals_d(firstIndex + j) = val2d_d(i, j); + } + } + }); + Kokkos::Profiling::popRegion(); + Kokkos::fence(); + + Kokkos::deep_copy(matCOO.rows, rows_d); + Kokkos::deep_copy(matCOO.cols, cols_d); + Kokkos::deep_copy(matCOO.vals, vals_d); + + Kokkos::Profiling::popRegion(); + return matCOO; +} +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterExtrap.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterExtrap.hxx new file mode 100644 index 0000000000000000000000000000000000000000..16202cb768d1b60c55d1cb7b2a5284eafc4768bd --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterExtrap.hxx @@ -0,0 +1,101 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include "FTUtils.hxx" +#include "SkinProjection.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/ComputeSkin/ComputeLinearSkin.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/Utils/ArborXCallbacks.hxx" + +namespace Muscat::KK::FieldTransfer { + +template +/** + * @brief Compute the target point finite element interpolation if intersects or assign nearest element extrapolation values if outside all elements. + * @param[in] transfer the transfer class holding informations. + * @return sparse coo matrix containing interpolation coefficient aka field transfer operator + */ +SparsedMatCOO FTInterExtrap(TransferClassKokkos* transfer) { + Kokkos::Profiling::pushRegion("FTInterExtrap"); + + using MemorySpace = typename ExecSpace::memory_space; + using Box = ArborX::Box; + + ExecSpace execSpace{}; + + auto targetPointsPtr = transfer->targetPoints; + auto targetElemPtr = transfer->targetElement; + auto targetStatusPtr = transfer->targetStatus; + auto elementsTypePtr = transfer->elementTypes; + auto nodesPtr = transfer->nodes; + auto maxConnSizePerElt = transfer->maxConnSizePerElt; + + auto nbtargetPoints = targetPointsPtr.extent(0); + + Kokkos::Profiling::pushRegion("Compute target point FE intersection"); + auto [col2d_d, val2d_d, countEntries] = + ComputeBoxTargetPointIntersection(transfer); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Compute target point point on triangulated skin mesh"); + ComputeLinearSkin(transfer); + if constexpr (spaceDimension == 1) + ComputeProjectionOn1DSkin(transfer, col2d_d, val2d_d, countEntries, true); + else if constexpr (spaceDimension == 2) + ComputeProjectionOn2DSkin(transfer, col2d_d, val2d_d, countEntries, true); + else if constexpr (spaceDimension == 3) + ComputeProjectionOn3DSkin(transfer, col2d_d, val2d_d, countEntries, true); + else + throw std::runtime_error("Unexpected dimension for projection"); + + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + // Can we avoid this reduction and branching + int nnz = 0; + Kokkos::Profiling::pushRegion("Get nnz"); + Kokkos::parallel_reduce( + "Compute COO operator functor", + Kokkos::RangePolicy(execSpace, 0, nbtargetPoints), + KOKKOS_LAMBDA(int i, int& nbEntries) { + nbEntries += countEntries(i); + }, + nnz); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + // Scan (assemble) COO operator ! + Kokkos::Profiling::pushRegion("Scan COO operator"); + auto matCOO = SparsedMatCOO(nnz); + auto rows_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "row_d"), nnz); + auto cols_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "col_d"), nnz); + auto vals_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "data_d"), nnz); + Kokkos::parallel_scan( + "Scan COO operator functor", + Kokkos::RangePolicy(execSpace, 0, nbtargetPoints), + KOKKOS_LAMBDA(int64_t i, int64_t& partial_sum, bool is_final) { + auto firstIndex = partial_sum; + partial_sum += countEntries(i); + if (is_final) { + for (int64_t j = 0; j < countEntries(i); j++) { + rows_d(firstIndex + j) = i; + cols_d(firstIndex + j) = col2d_d(i, j); + vals_d(firstIndex + j) = val2d_d(i, j); + } + } + }); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + Kokkos::deep_copy(matCOO.rows, rows_d); + Kokkos::deep_copy(matCOO.cols, cols_d); + Kokkos::deep_copy(matCOO.vals, vals_d); + + Kokkos::Profiling::popRegion(); + return matCOO; +} +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterNearest.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterNearest.hxx new file mode 100644 index 0000000000000000000000000000000000000000..3de85f71da6a80ec899a5a9d6e514d83ce3ef11d --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterNearest.hxx @@ -0,0 +1,106 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include "FTUtils.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/Utils/ArborXCallbacks.hxx" + +namespace Muscat::KK::FieldTransfer { + +template +/** + * @brief Compute the target point finite element interpolation if intersects or assign nearest node value if outside all elements. + * @param[in] transfer the transfer class holding informations. + * @return sparse coo matrix containing interpolation coefficient aka field transfer operator + */ +SparsedMatCOO FTInterNearest(TransferClassKokkos* transfer) { + Kokkos::Profiling::pushRegion("FTInterNearest"); + + using MemorySpace = typename ExecSpace::memory_space; + using Box = ArborX::Box; + + ExecSpace execSpace{}; + + auto targetPointsPtr = transfer->targetPoints; + auto targetElemPtr = transfer->targetElement; + auto targetStatusPtr = transfer->targetStatus; + auto elementsTypePtr = transfer->elementTypes; + auto nodesPtr = transfer->nodes; + auto maxConnSizePerElt = transfer->maxConnSizePerElt; + + auto nbtargetPoints = targetPointsPtr.extent(0); + + ArborX::BoundingVolumeHierarchy bvhPoints(execSpace, ArborX::Experimental::attach_indices(nodesPtr)); + Kokkos::Profiling::pushRegion("Compute nearest points"); + auto nearestPointValues = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "nearestPointValues"), 0); + auto nearestPointOffsets = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "nearestPointOffsets"), 0); + PointCloudNearest pcn{targetPointsPtr}; + bvhPoints.query(execSpace, pcn, ExtractIndex{}, nearestPointValues, nearestPointOffsets); + Kokkos::Profiling::popRegion(); + + Kokkos::Profiling::pushRegion("Compute target point FE intersection"); + auto [col2d_d, val2d_d, countEntries] = + ComputeBoxTargetPointIntersection(transfer); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + // Can we avoid this reduction and branching ? However, it is not a hotspot + int nnz = 0; + Kokkos::Profiling::pushRegion("Get nnz"); + Kokkos::parallel_reduce( + "Compute COO operator functor", + Kokkos::RangePolicy(execSpace, 0, nbtargetPoints), + KOKKOS_LAMBDA(int i, int& nbEntries) { + if (targetStatusPtr(i) == TransferStatus::INTER) { + nbEntries += countEntries(i); + } else { // get nearest point + nbEntries++; + col2d_d(i, 0) = nearestPointValues(i); + countEntries(i) = 1; + targetStatusPtr(i) = TransferStatus::NEAREST; + targetElemPtr(i) = nearestPointValues(i); + val2d_d(i, 0) = 1.0; + } + }, + nnz); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + // Scan (assemble) COO operator ! + Kokkos::Profiling::pushRegion("Scan COO operator"); + auto matCOO = SparsedMatCOO(nnz); + auto rows_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "row_d"), nnz); + auto cols_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "col_d"), nnz); + auto vals_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "data_d"), nnz); + Kokkos::parallel_scan( + "Scan COO operator functor", + Kokkos::RangePolicy(execSpace, 0, nbtargetPoints), + KOKKOS_LAMBDA(int64_t i, int64_t& partial_sum, bool is_final) { + auto firstIndex = partial_sum; + partial_sum += countEntries(i); + if (is_final) { + for (int64_t j = 0; j < countEntries(i); j++) { + rows_d(firstIndex + j) = i; + cols_d(firstIndex + j) = col2d_d(i, j); + vals_d(firstIndex + j) = val2d_d(i, j); + } + } + }); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + Kokkos::deep_copy(matCOO.rows, rows_d); + Kokkos::deep_copy(matCOO.cols, cols_d); + Kokkos::deep_copy(matCOO.vals, vals_d); + + Kokkos::fence(); + + Kokkos::Profiling::popRegion(); + return matCOO; +} +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterZero.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterZero.hxx new file mode 100644 index 0000000000000000000000000000000000000000..5b0bda028b1466002ac6ce8e0a39df569c10bd94 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTInterZero.hxx @@ -0,0 +1,94 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include "FTUtils.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/Utils/ArborXCallbacks.hxx" + +namespace Muscat::KK::FieldTransfer { + +template +/** + * @brief Compute the target point finite element interpolation if intersects or fill with zero value if outside all elements. + * @param[in] transfer the transfer class holding informations. + * @return sparse coo matrix containing interpolation coefficient aka field transfer operator + */ +SparsedMatCOO FTInterZero(TransferClassKokkos* transfer) { + Kokkos::Profiling::pushRegion("FTInterZero"); + + using MemorySpace = typename ExecSpace::memory_space; + using Box = ArborX::Box; + + ExecSpace execSpace{}; + + auto targetPointsPtr = transfer->targetPoints; + auto targetElemPtr = transfer->targetElement; + auto targetStatusPtr = transfer->targetStatus; + auto elementsTypePtr = transfer->elementTypes; + auto nodesPtr = transfer->nodes; + auto maxConnSizePerElt = transfer->maxConnSizePerElt; + + auto nbtargetPoints = targetPointsPtr.extent(0); + + Kokkos::Profiling::pushRegion("Compute target point FE intersection"); + auto [col2d_d, val2d_d, countEntries] = + ComputeBoxTargetPointIntersection(transfer); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + // Can we avoid this reduction and branching + int nnz = 0; + Kokkos::Profiling::pushRegion("Get nnz"); + Kokkos::parallel_reduce( + "Compute COO operator functor", + Kokkos::RangePolicy(execSpace, 0, nbtargetPoints), + KOKKOS_LAMBDA(int i, int& nbEntries) { + if (targetStatusPtr(i) == TransferStatus::INTER) { + nbEntries += countEntries(i); + } else { // set zeros + nbEntries++; + col2d_d(i, 0) = 0; // this is arbitrary as the value is zero + countEntries(i) = 1; + targetStatusPtr(i) = TransferStatus::ZERO_FILL; + targetElemPtr(i) = 0; // this is arbitrary as the value is zero + val2d_d(i, 0) = 0.0; + } + }, + nnz); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + // Scan (assemble) COO operator ! + Kokkos::Profiling::pushRegion("Scan COO operator"); + auto matCOO = SparsedMatCOO(nnz); + auto rows_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "row_d"), nnz); + auto cols_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "col_d"), nnz); + auto vals_d = Kokkos::View(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "data_d"), nnz); + Kokkos::parallel_scan( + "Scan COO operator functor", + Kokkos::RangePolicy(execSpace, 0, nbtargetPoints), + KOKKOS_LAMBDA(int64_t i, int64_t& partial_sum, bool is_final) { + auto firstIndex = partial_sum; + partial_sum += countEntries(i); + if (is_final) { + for (int64_t j = 0; j < countEntries(i); j++) { + rows_d(firstIndex + j) = i; + cols_d(firstIndex + j) = col2d_d(i, j); + vals_d(firstIndex + j) = val2d_d(i, j); + } + } + }); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + Kokkos::deep_copy(matCOO.rows, rows_d); + Kokkos::deep_copy(matCOO.cols, cols_d); + Kokkos::deep_copy(matCOO.vals, vals_d); + + Kokkos::Profiling::popRegion(); + return matCOO; +} +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTNearest.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTNearest.hxx new file mode 100644 index 0000000000000000000000000000000000000000..6fbd3ab6cf51d534c1bb5eeea9f24bef6c0f6597 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTNearest.hxx @@ -0,0 +1,59 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include "FTUtils.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/Utils/ArborXCallbacks.hxx" + +namespace Muscat::KK::FieldTransfer { + +template +/** + * @brief Compute the nearest point in the mesh for all target points and assigns its value. + * @param[in] transfer the transfer class holding informations. + * @return sparse coo matrix containing interpolation coefficient aka field transfer operator + */ +SparsedMatCOO FTNearest(TransferClassKokkos* transfer) { + Kokkos::Profiling::pushRegion("FTNearest"); + + using MemorySpace = typename ExecSpace::memory_space; + using Point = ArborX::Point; + + ExecSpace execSpace{}; + + auto targetPointsPtr = transfer->targetPoints; + auto targetElemPtr = transfer->targetElement; + auto targetStatusPtr = transfer->targetStatus; + auto nodesPtr = transfer->nodes; + + auto nbtargetPoints = targetPointsPtr.extent(0); + + auto matCOO = SparsedMatCOO(targetPointsPtr.extent(0)); + auto rows_d = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "rows_d"), nbtargetPoints); + auto cols_d = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "cols_d"), nbtargetPoints); + auto vals_d = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "vals_d"), nbtargetPoints); + + Kokkos::Profiling::pushRegion("BVH construction and NN search"); + + ArborX::BoundingVolumeHierarchy bvh(execSpace, ArborX::Experimental::attach_indices(nodesPtr)); + PointCloudNearest pcn{targetPointsPtr}; + bvh.query(execSpace, pcn, NearestExtractIndex{rows_d, cols_d, vals_d, targetStatusPtr, targetElemPtr}); + Kokkos::fence(); + + Kokkos::Profiling::popRegion(); + + Kokkos::deep_copy(matCOO.rows, rows_d); + Kokkos::deep_copy(matCOO.cols, cols_d); + Kokkos::deep_copy(matCOO.vals, vals_d); + + Kokkos::Profiling::popRegion(); + return matCOO; +} + +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTUtils.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTUtils.hxx new file mode 100644 index 0000000000000000000000000000000000000000..7f1188c9ad44e244b23e8636b57a25a13a53fa07 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/FTUtils.hxx @@ -0,0 +1,145 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include + +#include + +#include +#include "MeshTools/TransferBackEnds/Kokkos/NewtonKokkos.hxx" +#include + +namespace Muscat::KK::FieldTransfer { + +#define CaseSwitch(ELEMENTTYPE_STRUCT) \ + case ElementType::ELEMENTTYPE_STRUCT: { \ + if constexpr (spaceDimension >= ELEMENTTYPE_STRUCT##_LagrangeSpaceGeo::dimensionality) \ + return ApplyNewton(xcoor, targetPoint, val, forceEvaluation); \ + break; \ + } + +template +/** + * @brief Redirector function to call the ApplyNewton with the right templates. + * @param[in] type The element type specifying the template to use. + * @param[in] xcoor The coordinates of the element. + * @param[in] targetPoint The target point we want the barycentric coordinates. + * @param[in] clampResult If we want to clamp the result. The return value is always true if not. + * @return true if the target point lands inside of the element and the result is not clamped. + */ +KOKKOS_FUNCTION bool ApplyNewtonOnElement(const ElementType& type, const Eigen::Matrix& xcoor, Eigen::Matrix& targetPoint, Kokkos::View val, const bool forceEvaluation = false) { + switch (type) { + CaseSwitch(Bar_2) + CaseSwitch(Hexahedron_20) + CaseSwitch(Bar_3) + CaseSwitch(Hexahedron_27) + CaseSwitch(Hexahedron_8) + CaseSwitch(Pyramid_13) + CaseSwitch(Pyramid_14) + CaseSwitch(Pyramid_5) + CaseSwitch(Quadrangle_4) + CaseSwitch(Quadrangle_8) + CaseSwitch(Quadrangle_9) + CaseSwitch(Tetrahedron_10) + CaseSwitch(Tetrahedron_4) + CaseSwitch(Triangle_3) + CaseSwitch(Triangle_6) + CaseSwitch(Wedge_15) + CaseSwitch(Wedge_18) + CaseSwitch(Wedge_6) default : char errorMsg[] = "Unexpected element type XXX"; + errorMsg[sizeof(errorMsg) - 4] = '0' + ((int)type / 100); + errorMsg[sizeof(errorMsg) - 3] = '0' + (((int)type / 10) % 10); + errorMsg[sizeof(errorMsg) - 2] = '0' + ((int)type % 10); + Kokkos::abort(errorMsg); + return false; + } + return false; +} + +template +std::tuple, Kokkos::View, Kokkos::View> +ComputeBoxTargetPointIntersection(TransferClassKokkos* transfer) { + using MemorySpace = typename ExecSpace::memory_space; + using Box = ArborX::Box; + + ExecSpace execSpace{}; + + auto targetPointsPtr = transfer->targetPoints; + auto targetElemPtr = transfer->targetElement; + auto targetStatusPtr = transfer->targetStatus; + auto connValPtr = transfer->connectivity_values; + auto connOffPtr = transfer->connectivity_offsets; + auto elementsTypePtr = transfer->elementTypes; + auto nodesPtr = transfer->nodes; + auto maxConnSizePerElt = transfer->maxConnSizePerElt; + + auto nbtargetPoints = targetPointsPtr.extent(0); + + Kokkos::View bboxes(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "Boundary boxes of elements"), transfer->nbElems); + Kokkos::parallel_for( + "Compute element bounding boxes functor", + Kokkos::RangePolicy(execSpace, 0, transfer->nbElems), + KOKKOS_LAMBDA(int i) { + bboxes(i) = Box(); + auto localConn = Kokkos::subview(connValPtr, Kokkos::make_pair(connOffPtr(i), connOffPtr(i + 1))); + int nbConnNodes = connOffPtr(i + 1) - connOffPtr(i); + for (int j = 0; j < nbConnNodes; j++) { + ArborX::Details::expand(bboxes(i), nodesPtr(localConn(j))); + } + }); + ArborX::BoundingVolumeHierarchy bvhBoxes(execSpace, ArborX::Experimental::attach_indices(bboxes)); + auto intersectValues = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "IntersectValues"), 0); + auto intersectOffsets = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "IntersectOffsets"), 0); + auto col2d_d = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "col2d_d"), nbtargetPoints, maxConnSizePerElt); + auto val2d_d = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "val2d_d"), nbtargetPoints, maxConnSizePerElt); + auto countEntries = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "countEntries"), nbtargetPoints); + + PointIntersect pi{targetPointsPtr}; + bvhBoxes.query(execSpace, pi, ExtractIndex{}, intersectValues, intersectOffsets); + Kokkos::parallel_for( + "Copy nodes", Kokkos::RangePolicy(execSpace, 0, nbtargetPoints), KOKKOS_LAMBDA(const int i) { + for (int ibox = intersectOffsets(i); ibox < intersectOffsets(i + 1); ibox++) { + // predicate = target point data + Eigen::Matrix tp; + for (int j = 0; j < spaceDimension; j++) { + tp(0, j) = targetPointsPtr(i, j); + } + // primitive intersected bounding box finite element data + const Muscat::CMuscatIndex curElem = intersectValues(ibox); + const auto elemType = elementsTypePtr(curElem); + Eigen::Matrix Xcoor; + Xcoor.setZero(); + auto localConn = Kokkos::subview(connValPtr, Kokkos::make_pair(connOffPtr(curElem), connOffPtr(curElem + 1))); + int nbConnNodes = connOffPtr(curElem + 1) - connOffPtr(curElem); + for (int j = 0; j < nbConnNodes; ++j) { + auto nodeId = localConn(j); + for (int k = 0; k < spaceDimension; ++k) { + Xcoor(j, k) = nodesPtr(nodeId)[k]; + } + } + auto val2d_sv = Kokkos::subview(val2d_d, i, Kokkos::make_pair(0, nbConnNodes)); + if (Muscat::KK::FieldTransfer::ApplyNewtonOnElement(elemType, Xcoor, tp, val2d_sv /*, false*/)) { + for (int j = 0; j < nbConnNodes; ++j) { + col2d_d(i, j) = localConn(j); + } + countEntries(i) = nbConnNodes; + targetStatusPtr(i) = TransferStatus::INTER; + targetElemPtr(i) = curElem; + return; + } + } + }); + Kokkos::fence(); + return std::make_tuple(col2d_d, val2d_d, countEntries); +} + +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/SkinProjection.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/SkinProjection.hxx new file mode 100644 index 0000000000000000000000000000000000000000..9b244eee6132debcc12c0e3c245eff7a24ab7d68 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Transfers/SkinProjection.hxx @@ -0,0 +1,262 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include + +#include "MeshTools/TransferBackEnds/Kokkos/Utils/ArborXCallbacks.hxx" + +namespace Muscat::KK::FieldTransfer { + +KOKKOS_INLINE_FUNCTION ArborX::Point<2, CMuscatFloat> closest_point_to_edge(const ArborX::Point<2, CMuscatFloat>& p, const ArborX::Point<2, CMuscatFloat>& a, const ArborX::Point<2, CMuscatFloat>& b) { + const auto ab = b - a; + const auto ap = p - a; + CMuscatFloat t = (ap[0] * ab[0] + ap[1] * ab[1]) / (ab[0] * ab[0] + ab[1] * ab[1]); + auto tclamp = Kokkos::max(0.0, Kokkos::min(1.0, t)); + ArborX::Point<2, CMuscatFloat> ret = {a[0] + ab[0] * tclamp, a[1] + ab[1] * tclamp}; + return ret; +}; + +template +void ComputeProjectionOn3DSkin(TransferClassKokkos* transfer, Kokkos::View col2d_d, Kokkos::View val2d_d, Kokkos::View countEntries, bool extrapol = false) { + using MemorySpace = typename ExecSpace::memory_space; + using Triangle = ArborX::Triangle<3, CMuscatFloat>; + using Point = ArborX::Point<3, CMuscatFloat>; + + ExecSpace execSpace{}; + + auto nodesPtr = transfer->nodes; + auto targetPointsPtr = transfer->targetPoints; + auto targetElemPtr = transfer->targetElement; + auto targetStatusPtr = transfer->targetStatus; + auto elementsTypePtr = transfer->elementTypes; + auto connValPtr = transfer->connectivity_values; + auto connOffPtr = transfer->connectivity_offsets; + auto skinFacesPtr = transfer->skinFaces; + auto skinParentsPtr = transfer->skinParents; + + Kokkos::Profiling::pushRegion("Build triangle BoundingVolumeHierarchy"); + auto nbTri = transfer->skinFaces.extent(0); + Kokkos::View skinFacesView(Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "Skin ArborX Triangles"), nbTri); + Kokkos::parallel_for( + "Fill ArborX triangles", + Kokkos::RangePolicy(execSpace, 0, nbTri), + KOKKOS_LAMBDA(int i) { + for (int d = 0; d < 3; d++) { + skinFacesView(i).a[d] = nodesPtr(skinFacesPtr(i, 0))[d]; + skinFacesView(i).b[d] = nodesPtr(skinFacesPtr(i, 1))[d]; + skinFacesView(i).c[d] = nodesPtr(skinFacesPtr(i, 2))[d]; + } + }); + ArborX::BoundingVolumeHierarchy skinBVH(execSpace, ArborX::Experimental::attach_indices(skinFacesView)); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + Kokkos::View values("values", 0); + Kokkos::View offsets("offsets", 0); + + Kokkos::Profiling::pushRegion("Point projection on triangle"); + PointCloudNearest pcn{targetPointsPtr}; + if (extrapol) { + skinBVH.query(execSpace, pcn, PointTriangleProjectionExtrapol{targetElemPtr, targetStatusPtr, skinParentsPtr}, values, offsets); + } else { + skinBVH.query(execSpace, pcn, PointTriangleProjection{targetPointsPtr, targetElemPtr, targetStatusPtr, skinParentsPtr}, values, offsets); + } + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + auto statusOutside = (extrapol) ? TransferStatus::EXTRAP : TransferStatus::CLAMP; + + Kokkos::Profiling::pushRegion("Compute projection and interpolation coefficient"); + Kokkos::parallel_for( + "Compute projection values ", + Kokkos::RangePolicy(execSpace, 0, values.extent(0)), + KOKKOS_LAMBDA(int i) { + auto predicate_index = values(i); + Eigen::Matrix tp; + for (int j = 0; j < 3; j++) { + tp(0, j) = targetPointsPtr(predicate_index, j); + } + // primitive intersected bounding box finite element data + const Muscat::CMuscatIndex curElem = targetElemPtr(predicate_index); + const auto elemType = elementsTypePtr(curElem); + Eigen::Matrix Xcoor; + Xcoor.setZero(); + auto localConn = Kokkos::subview(connValPtr, Kokkos::make_pair(connOffPtr(curElem), connOffPtr(curElem + 1))); + int nbConnNodes = connOffPtr(curElem + 1) - connOffPtr(curElem); + for (int j = 0; j < nbConnNodes; ++j) { + auto nodeId = localConn(j); + for (int k = 0; k < 3; ++k) { + Xcoor(j, k) = nodesPtr(nodeId)[k]; + } + } + auto val2d_sv = Kokkos::subview(val2d_d, predicate_index, Kokkos::make_pair(0, nbConnNodes)); + auto isInside = ApplyNewtonOnElement(elemType, Xcoor, tp, val2d_sv, true); + for (int j = 0; j < nbConnNodes; ++j) { + col2d_d(predicate_index, j) = localConn(j); + } + countEntries(predicate_index) = nbConnNodes; + targetStatusPtr(predicate_index) = statusOutside; + }); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + return; +}; + +template +void ComputeProjectionOn2DSkin(TransferClassKokkos* transfer, Kokkos::View col2d_d, Kokkos::View val2d_d, Kokkos::View countEntries, bool extrapol = false) { + Kokkos::Profiling::pushRegion("Compute query of nearest skin"); + + using MemorySpace = typename ExecSpace::memory_space; + using Point = ArborX::Point<2, CMuscatFloat>; + + ExecSpace execSpace{}; + + auto skinFacesPtr = transfer->skinFaces; + auto skinParentsPtr = transfer->skinParents; + auto nodesPtr = transfer->nodes; + auto targetPointsPtr = transfer->targetPoints; + auto targetElemPtr = transfer->targetElement; + auto targetStatusPtr = transfer->targetStatus; + auto elementsTypePtr = transfer->elementTypes; + auto connValPtr = transfer->connectivity_values; + auto connOffPtr = transfer->connectivity_offsets; + + Kokkos::Profiling::pushRegion("Compute projection and interpolation coefficient"); + Kokkos::parallel_for( + "Retrieve closest bar", + Kokkos::RangePolicy(execSpace, 0, targetPointsPtr.extent(0)), + KOKKOS_LAMBDA(int i) { + if (targetStatusPtr(i) == Muscat::KK::FieldTransfer::TransferStatus::UNDEFINED) { + const Point target = {targetPointsPtr(i, 0), targetPointsPtr(i, 1)}; + CMuscatFloat mindistsqr = Kokkos::Experimental::finite_max::value; + CMuscatIndex closestId = -1; + Point closest; + for (int s = 0; s < skinFacesPtr.extent(0); s++) { + auto p1 = nodesPtr(skinFacesPtr(s, 0)); + auto p2 = nodesPtr(skinFacesPtr(s, 1)); + auto cur = closest_point_to_edge(target, p1, p2); + CMuscatFloat dx = cur[0] - target[0]; + CMuscatFloat dy = cur[1] - target[1]; + CMuscatFloat curdistsqr = dx * dx + dy * dy; + if (curdistsqr < mindistsqr) { + closest = cur; + mindistsqr = curdistsqr; + closestId = s; + } + } + if (!extrapol) { + targetPointsPtr(i, 0) = closest[0]; + targetPointsPtr(i, 1) = closest[1]; + } + targetElemPtr(i) = skinParentsPtr(closestId); + const Muscat::CMuscatIndex curElem = targetElemPtr(i); + const auto elemType = elementsTypePtr(curElem); + Eigen::Matrix Xcoor; + Xcoor.setZero(); + auto localConn = Kokkos::subview(connValPtr, Kokkos::make_pair(connOffPtr(curElem), connOffPtr(curElem + 1))); + int nbConnNodes = connOffPtr(curElem + 1) - connOffPtr(curElem); + for (int j = 0; j < nbConnNodes; ++j) { + auto nodeId = localConn(j); + for (int k = 0; k < 2; ++k) { + Xcoor(j, k) = nodesPtr(nodeId)[k]; + } + } + Eigen::Matrix tp; + for (int j = 0; j < 2; j++) { + tp(0, j) = targetPointsPtr(i, j); + } + auto val2d_sv = Kokkos::subview(val2d_d, i, Kokkos::make_pair(0, nbConnNodes)); + auto isInside = ApplyNewtonOnElement(elemType, Xcoor, tp, val2d_sv, true); + for (int j = 0; j < nbConnNodes; ++j) { + col2d_d(i, j) = localConn(j); + } + countEntries(i) = nbConnNodes; + targetStatusPtr(i) = TransferStatus::CLAMP; + } + }); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + return; +}; + +template +void ComputeProjectionOn1DSkin(TransferClassKokkos* transfer, Kokkos::View col2d_d, Kokkos::View val2d_d, Kokkos::View countEntries, bool extrapol = false) { + Kokkos::Profiling::pushRegion("Compute query of nearest skin"); + + using MemorySpace = typename ExecSpace::memory_space; + using Point = ArborX::Point<1, CMuscatFloat>; + + ExecSpace execSpace{}; + + auto skinFacesPtr = transfer->skinFaces; + auto nodesPtr = transfer->nodes; + auto targetPointsPtr = transfer->targetPoints; + auto targetElemPtr = transfer->targetElement; + auto targetStatusPtr = transfer->targetStatus; + auto skinParentsPtr = transfer->skinParents; + auto elementsTypePtr = transfer->elementTypes; + auto connValPtr = transfer->connectivity_values; + auto connOffPtr = transfer->connectivity_offsets; + + Kokkos::Profiling::pushRegion("Compute projection and interpolation coefficient"); + Kokkos::parallel_for( + "Retrieve closest point", + Kokkos::RangePolicy(execSpace, 0, targetPointsPtr.extent(0)), + KOKKOS_LAMBDA(int i) { + if (targetStatusPtr(i) == Muscat::KK::FieldTransfer::TransferStatus::UNDEFINED) { + const Point target = {targetPointsPtr(i, 0)}; + CMuscatFloat mindist = Kokkos::Experimental::finite_max::value; + CMuscatIndex closestId = -1; + Point closest; + for (int s = 0; s < skinFacesPtr.extent(0); s++) { + auto p = nodesPtr(skinFacesPtr(s, 0)); + auto curdist = Kokkos::abs(p[0] - target[0]); + if (curdist < mindist) { + closest = p; + mindist = curdist; + closestId = s; + } + } + if (!extrapol) { + targetPointsPtr(i, 0) = closest[0]; + } + targetElemPtr(i) = skinParentsPtr(closestId); + const Muscat::CMuscatIndex curElem = targetElemPtr(i); + const auto elemType = elementsTypePtr(curElem); + + Eigen::Matrix Xcoor; + Xcoor.setZero(); + auto localConn = Kokkos::subview(connValPtr, Kokkos::make_pair(connOffPtr(curElem), connOffPtr(curElem + 1))); + int nbConnNodes = connOffPtr(curElem + 1) - connOffPtr(curElem); + for (int j = 0; j < nbConnNodes; ++j) { + auto nodeId = localConn(j); + for (int k = 0; k < 1; ++k) { + Xcoor(j, k) = nodesPtr(nodeId)[k]; + } + } + + Eigen::Matrix tp; + for (int j = 0; j < 1; j++) { + tp(0, j) = targetPointsPtr(i, j); + } + auto val2d_sv = Kokkos::subview(val2d_d, i, Kokkos::make_pair(0, nbConnNodes)); + auto isInside = ApplyNewtonOnElement(elemType, Xcoor, tp, val2d_sv, true); + for (int j = 0; j < nbConnNodes; ++j) { + col2d_d(i, j) = localConn(j); + } + countEntries(i) = nbConnNodes; + targetStatusPtr(i) = TransferStatus::CLAMP; + } + }); + Kokkos::fence(); + Kokkos::Profiling::popRegion(); + + return; +}; + +}; // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/ArborXCallbacks.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/ArborXCallbacks.hxx new file mode 100644 index 0000000000000000000000000000000000000000..4ede9814004718e0f0f05a2f062ec55cd58e95f6 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/ArborXCallbacks.hxx @@ -0,0 +1,167 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// +// ArborX Callbacks definition for Kokkos Field Transfer methods + +#pragma once + +#include + +template +struct PointNearest { + Kokkos::View*, MemorySpace> points; +}; + +template +struct ArborX::AccessTraits> { + using memory_space = MemorySpace; + using size_type = std::size_t; + + KOKKOS_FUNCTION + static size_type size(const PointNearest& pi) { + return pi.points.extent(0); + } + KOKKOS_FUNCTION + static auto get(const PointNearest& pi, + size_type i) { + return attach(nearest(pi.points(i), 1), (int)i); + } +}; + +template +struct PointTriangleProjection { + using MemorySpace = typename ExecSpace::memory_space; + + Kokkos::View targetPointsPtr; + Kokkos::View parentElt; + Kokkos::View status; + Kokkos::View skinParents; + + template + KOKKOS_FUNCTION void operator()(const Predicate& predicate, + const Value& value, + const OutputFunctor& out) const { + const int predicate_index = ArborX::getData(predicate); + + if (status(predicate_index) == Muscat::KK::FieldTransfer::TransferStatus::UNDEFINED) { + ArborX::Point targetPoint; + for (int k = 0; k < spaceDimension; k++) { + targetPoint[k] = targetPointsPtr(predicate_index, k); + } + + auto triangle = value.value; + + using PointTag = ArborX::GeometryTraits::PointTag; + using TriangleTag = ArborX::GeometryTraits::TriangleTag; + using Point = decltype(targetPoint); + using Triangle = decltype(triangle); + + ArborX::Details::Dispatch::distance dist{}; + targetPoint = dist.closest_point(targetPoint, triangle.a, triangle.b, triangle.c); + for (int k = 0; k < spaceDimension; k++) { + targetPointsPtr(predicate_index, k) = targetPoint[k]; + } + parentElt(predicate_index) = skinParents(value.index); + out(predicate_index); + } + } +}; + +template +struct PointTriangleProjectionExtrapol { + using MemorySpace = typename ExecSpace::memory_space; + + Kokkos::View parentElt; + Kokkos::View status; + Kokkos::View skinParents; + + template + KOKKOS_FUNCTION void operator()(const Predicate& predicate, + const Value& value, + const OutputFunctor& out) const { + const int predicate_index = ArborX::getData(predicate); + + if (status(predicate_index) == Muscat::KK::FieldTransfer::TransferStatus::UNDEFINED) { + parentElt(predicate_index) = skinParents(value.index); + out(predicate_index); + } + } +}; + +struct ExtractIndex { + template + KOKKOS_FUNCTION void operator()(Predicate, const Value& value, + const OutputFunctor& out) const { + out(value.index); + } +}; + +template +struct PointCloudNearest { + Kokkos::View points; +}; + +template +struct ArborX::AccessTraits> { + using memory_space = MemorySpace; + using size_type = std::size_t; + + KOKKOS_FUNCTION + static size_type size(const PointCloudNearest& pc) { + return pc.points.extent(0); + } + KOKKOS_FUNCTION + static auto get(const PointCloudNearest& pc, + size_type i) { + ArborX::Point point; + for (int k = 0; k < spaceDimension; k++) { + point[k] = pc.points(i, k); + } + return attach(nearest(point, 1), (int)i); + } +}; + +template +struct NearestExtractIndex { + Kokkos::View row; + Kokkos::View col; + Kokkos::View val; + Kokkos::View status; + Kokkos::View parentElt; + + template + KOKKOS_FUNCTION void operator()(const Predicate& predicate, const Value& value) const { + const int predicate_index = ArborX::getData(predicate); + row(predicate_index) = predicate_index; + col(predicate_index) = value.index; + val(predicate_index) = 1.0; + status(predicate_index) = Muscat::KK::FieldTransfer::TransferStatus::NEAREST; + parentElt(predicate_index) = value.index; + } +}; + +template +struct PointIntersect { + Kokkos::View points; +}; + +template +struct ArborX::AccessTraits> { + using memory_space = MemorySpace; + using size_type = std::size_t; + + KOKKOS_FUNCTION + static size_type size(const PointIntersect& pc) { + return pc.points.extent(0); + } + KOKKOS_FUNCTION + static auto get(const PointIntersect& pc, + size_type i) { + ArborX::Point point; + for (int k = 0; k < spaceDimension; k++) { + point[k] = pc.points(i, k); + } + return attach(intersects(point), (int)i); + } +}; diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/Consts.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/Consts.hxx new file mode 100644 index 0000000000000000000000000000000000000000..4faa1df01c6910273529dc14e7bf476504ef440b --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/Consts.hxx @@ -0,0 +1,22 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +namespace Muscat::KK::FieldTransfer { + +enum class TransferStatus : CMuscatIndex { + UNDEFINED = -1, + OUTSIDE = -2, // temporary + NEAREST = 0, + INTER = 1, + EXTRAP = 2, + CLAMP = 3, + ZERO_FILL = 4, +}; + +static const int maxNodePerElt = 27; + +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/FieldTransferTools.h b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/FieldTransferTools.h new file mode 100644 index 0000000000000000000000000000000000000000..16d567e1ccbdffac0e1b4fa5cbba37aa0fdc3878 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/FieldTransferTools.h @@ -0,0 +1,1168 @@ + +#pragma once + +#include + +#include + +namespace Muscat::KK::FieldTransfer { + +KOKKOS_INLINE_FUNCTION int getDimension(const ElementType& type) { + switch (type) { + case ElementType::Bar_2: + case ElementType::Bar_3: + return 1; + case ElementType::Quadrangle_4: + case ElementType::Quadrangle_8: + case ElementType::Quadrangle_9: + case ElementType::Triangle_3: + case ElementType::Triangle_6: + return 2; + case ElementType::Hexahedron_20: + case ElementType::Hexahedron_27: + case ElementType::Hexahedron_8: + case ElementType::Pyramid_13: + case ElementType::Pyramid_14: + case ElementType::Pyramid_5: + case ElementType::Tetrahedron_10: + case ElementType::Tetrahedron_4: + case ElementType::Wedge_15: + case ElementType::Wedge_18: + case ElementType::Wedge_6: + return 3; + } + return -1; +}; +KOKKOS_INLINE_FUNCTION int getNbFaceDim1(const ElementType& type) { + switch (type) { + case ElementType::Element_NA: + return -1; + case ElementType::Point_1: + return 1; + case ElementType::Bar_2: + return 2; + case ElementType::Bar_3: + return 2; + case ElementType::Quadrangle_4: + return -1; + case ElementType::Quadrangle_8: + return -1; + case ElementType::Quadrangle_9: + return -1; + case ElementType::Triangle_3: + return -1; + case ElementType::Triangle_6: + return -1; + case ElementType::Hexahedron_20: + return -1; + case ElementType::Hexahedron_27: + return -1; + case ElementType::Hexahedron_8: + return -1; + case ElementType::Pyramid_13: + return -1; + case ElementType::Pyramid_14: + return -1; + case ElementType::Pyramid_5: + return -1; + case ElementType::Tetrahedron_10: + return -1; + case ElementType::Tetrahedron_4: + return -1; + case ElementType::Wedge_15: + return -1; + case ElementType::Wedge_18: + return -1; + case ElementType::Wedge_6: + return -1; + default: + return -1; + } +}; +KOKKOS_INLINE_FUNCTION int getNbFaceDim2(const ElementType& type) { + switch (type) { + case ElementType::Element_NA: + return -1; + case ElementType::Point_1: + return -1; + case ElementType::Bar_2: + return 1; + case ElementType::Bar_3: + return 1; + case ElementType::Quadrangle_4: + return 4; + case ElementType::Quadrangle_8: + return 4; + case ElementType::Quadrangle_9: + return 4; + case ElementType::Triangle_3: + return 3; + case ElementType::Triangle_6: + return 3; + case ElementType::Hexahedron_20: + return -1; + case ElementType::Hexahedron_27: + return -1; + case ElementType::Hexahedron_8: + return -1; + case ElementType::Pyramid_13: + return -1; + case ElementType::Pyramid_14: + return -1; + case ElementType::Pyramid_5: + return -1; + case ElementType::Tetrahedron_10: + return -1; + case ElementType::Tetrahedron_4: + return -1; + case ElementType::Wedge_15: + return -1; + case ElementType::Wedge_18: + return -1; + case ElementType::Wedge_6: + return -1; + default: + return -1; + } +}; +KOKKOS_INLINE_FUNCTION int getNbFaceDim3(const ElementType& type) { + switch (type) { + case ElementType::Element_NA: + return -1; + case ElementType::Point_1: + return -1; + case ElementType::Bar_2: + return -1; + case ElementType::Bar_3: + return -1; + case ElementType::Quadrangle_4: + return 1; + case ElementType::Quadrangle_8: + return 1; + case ElementType::Quadrangle_9: + return 1; + case ElementType::Triangle_3: + return 1; + case ElementType::Triangle_6: + return 1; + case ElementType::Hexahedron_20: + return 6; + case ElementType::Hexahedron_27: + return 6; + case ElementType::Hexahedron_8: + return 6; + case ElementType::Pyramid_13: + return 5; + case ElementType::Pyramid_14: + return 5; + case ElementType::Pyramid_5: + return 5; + case ElementType::Tetrahedron_10: + return 4; + case ElementType::Tetrahedron_4: + return 4; + case ElementType::Wedge_15: + return 5; + case ElementType::Wedge_18: + return 5; + case ElementType::Wedge_6: + return 5; + default: + return -1; + } +}; +template +KOKKOS_INLINE_FUNCTION int getNbFace(const ElementType& type) { + switch (spaceDimension) { + case 1: + return getNbFaceDim1(type); + case 2: + return getNbFaceDim2(type); + case 3: + return getNbFaceDim3(type); + default: + assert(false); + return -1; + } +}; +static std::vector elemDim1LinearFacesPaddedOffsets{ + // FOR OFFSETING + 0, + // Element_NA + 0, + // Bar_2 + 2, + // Bar_3 + 4, + // Hexahedron_20 + 2, + // Hexahedron_27 + 2, + // Hexahedron_8 + 2, + // Point_1 + 5, + // Pyramid_13 + 2, + // Pyramid_14 + 2, + // Pyramid_5 + 2, + // Quadrangle_4 + 2, + // Quadrangle_8 + 2, + // Quadrangle_9 + 2, + // Tetrahedron_10 + 2, + // Tetrahedron_4 + 2, + // Triangle_3 + 2, + // Triangle_6 + 2, + // Wedge_15 + 2, + // Wedge_18 + 2, + // Wedge_6 + 2, +}; +static std::vector elemDim1LinearFacesPaddedEntries{ + // Element_NA + + // Bar_2 + 0, 1, + // Bar_3 + 0, 1, + // Hexahedron_20 + + // Hexahedron_27 + + // Hexahedron_8 + + // Point_1 + 0, + // Pyramid_13 + + // Pyramid_14 + + // Pyramid_5 + + // Quadrangle_4 + + // Quadrangle_8 + + // Quadrangle_9 + + // Tetrahedron_10 + + // Tetrahedron_4 + + // Triangle_3 + + // Triangle_6 + + // Wedge_15 + + // Wedge_18 + + // Wedge_6 + +}; +static std::vector elemDim2LinearFacesPaddedOffsets{ + // FOR OFFSETING + 0, + // Element_NA + 0, + // Bar_2 + 2, + // Bar_3 + 4, + // Hexahedron_20 + 4, + // Hexahedron_27 + 4, + // Hexahedron_8 + 4, + // Point_1 + 4, + // Pyramid_13 + 4, + // Pyramid_14 + 4, + // Pyramid_5 + 4, + // Quadrangle_4 + 12, + // Quadrangle_8 + 20, + // Quadrangle_9 + 28, + // Tetrahedron_10 + 28, + // Tetrahedron_4 + 28, + // Triangle_3 + 34, + // Triangle_6 + 40, + // Wedge_15 + 40, + // Wedge_18 + 40, + // Wedge_6 + 40, +}; +static std::vector elemDim2LinearFacesPaddedEntries{ + // Element_NA + + // Bar_2 + /*face 0*/ 0, 1, + // Bar_3 + /*face 0*/ 0, 1, + // Hexahedron_20 + + // Hexahedron_27 + + // Hexahedron_8 + + // Point_1 + + // Pyramid_13 + + // Pyramid_14 + + // Pyramid_5 + + // Quadrangle_4 + /*face 0*/ 0, 1, + /*face 1*/ 1, 2, + /*face 2*/ 2, 3, + /*face 3*/ 3, 0, + // Quadrangle_8 + /*face 0*/ 0, 1, + /*face 1*/ 1, 2, + /*face 2*/ 2, 3, + /*face 3*/ 3, 0, + // Quadrangle_9 + /*face 0*/ 0, 1, + /*face 1*/ 1, 2, + /*face 2*/ 2, 3, + /*face 3*/ 3, 0, + // Tetrahedron_10 + + // Tetrahedron_4 + + // Triangle_3 + /*face 0*/ 0, 1, + /*face 1*/ 1, 2, + /*face 2*/ 2, 0, + // Triangle_6 + /*face 0*/ 0, 1, + /*face 1*/ 1, 2, + /*face 2*/ 2, 0, + // Wedge_15 + + // Wedge_18 + + // Wedge_6 + +}; +static std::vector elemDim3LinearFacesPaddedOffsets{ + // FOR OFFSETING + 0, + // Element_NA + 0, + // Bar_2 + 0, + // Bar_3 + 0, + // Hexahedron_20 + 24, + // Hexahedron_27 + 48, + // Hexahedron_8 + 72, + // Point_1 + 72, + // Pyramid_13 + 92, + // Pyramid_14 + 112, + // Pyramid_5 + 132, + // Quadrangle_4 + 136, + // Quadrangle_8 + 140, + // Quadrangle_9 + 144, + // Tetrahedron_10 + 160, + // Tetrahedron_4 + 176, + // Triangle_3 + 180, + // Triangle_6 + 184, + // Wedge_15 + 208, + // Wedge_18 + 232, + // Wedge_6 + 256, +}; +static std::vector elemDim3LinearFacesPaddedEntries{ + // Element_NA + + // Bar_2 + + // Bar_3 + + // Hexahedron_20 + /*face 0*/ 0, 1, 2, 3, + /*face 1*/ 4, 5, 6, 7, + /*face 2*/ 0, 3, 7, 4, + /*face 3*/ 1, 2, 6, 5, + /*face 4*/ 0, 1, 5, 4, + /*face 5*/ 3, 2, 6, 7, + // Hexahedron_27 + /*face 0*/ 0, 1, 2, 3, + /*face 1*/ 4, 5, 6, 7, + /*face 2*/ 0, 3, 7, 4, + /*face 3*/ 1, 2, 6, 5, + /*face 4*/ 0, 1, 5, 4, + /*face 5*/ 3, 2, 6, 7, + // Hexahedron_8 + /*face 0*/ 0, 1, 2, 3, + /*face 1*/ 4, 5, 6, 7, + /*face 2*/ 0, 3, 7, 4, + /*face 3*/ 1, 2, 6, 5, + /*face 4*/ 0, 1, 5, 4, + /*face 5*/ 3, 2, 6, 7, + // Point_1 + + // Pyramid_13 + /*face 0*/ 0, 1, 2, 3, + /*face 1*/ 0, 1, 4, -1, + /*face 2*/ 0, 4, 3, -1, + /*face 3*/ 2, 4, 3, -1, + /*face 4*/ 1, 2, 4, -1, + // Pyramid_14 + /*face 0*/ 0, 1, 2, 3, + /*face 1*/ 0, 1, 4, -1, + /*face 2*/ 0, 4, 3, -1, + /*face 3*/ 2, 4, 3, -1, + /*face 4*/ 1, 2, 4, -1, + // Pyramid_5 + /*face 0*/ 0, 1, 2, 3, + /*face 1*/ 0, 1, 4, -1, + /*face 2*/ 0, 4, 3, -1, + /*face 3*/ 2, 4, 3, -1, + /*face 4*/ 1, 2, 4, -1, + // Quadrangle_4 + /*face 0*/ 0, 1, 2, 3, + // Quadrangle_8 + /*face 0*/ 0, 1, 2, 3, + // Quadrangle_9 + /*face 0*/ 0, 1, 2, 3, + // Tetrahedron_10 + /*face 0*/ 0, 1, 2, -1, + /*face 1*/ 0, 1, 3, -1, + /*face 2*/ 1, 3, 2, -1, + /*face 3*/ 0, 3, 2, -1, + // Tetrahedron_4 + /*face 0*/ 0, 1, 2, -1, + /*face 1*/ 0, 1, 3, -1, + /*face 2*/ 1, 3, 2, -1, + /*face 3*/ 0, 3, 2, -1, + // Triangle_3 + /*face 0*/ 0, 1, 2, -1, + // Triangle_6 + /*face 0*/ 0, 1, 2, -1, + // Wedge_15 + /*face 0*/ 0, 1, 2, -1, + /*face 1*/ 3, 4, 5, -1, + /*face 2*/ 0, 1, 4, 3, + /*face 3*/ 1, 2, 5, 4, + /*face 4*/ 0, 2, 5, 3, + // Wedge_18 + /*face 0*/ 0, 1, 2, -1, + /*face 1*/ 3, 4, 5, -1, + /*face 2*/ 0, 1, 4, 3, + /*face 3*/ 1, 2, 5, 4, + /*face 4*/ 0, 2, 5, 3, + // Wedge_6 + /*face 0*/ 0, 1, 2, -1, + /*face 1*/ 3, 4, 5, -1, + /*face 2*/ 0, 1, 4, 3, + /*face 3*/ 1, 2, 5, 4, + /*face 4*/ 0, 2, 5, 3}; +std::vector getLinearFacesPaddedOffsets(int dimension) { + switch (dimension) { + case 1: + return elemDim1LinearFacesPaddedOffsets; + case 2: + return elemDim2LinearFacesPaddedOffsets; + case 3: + return elemDim3LinearFacesPaddedOffsets; + default: + assert(false); + return std::vector(); + } +}; +std::vector getLinearFacesPaddedEntries(int dimension) { + switch (dimension) { + case 1: + return elemDim1LinearFacesPaddedEntries; + case 2: + return elemDim2LinearFacesPaddedEntries; + case 3: + return elemDim3LinearFacesPaddedEntries; + default: + assert(false); + return std::vector(); + } +}; +//=== triangularization +static std::vector elemDim1TriFacesOffsets{ + // FOR OFFSETING + 0, + // Element_NA + 0, + // Bar_2 + 2, + // Bar_3 + 5, + // Hexahedron_20 + 5, + // Hexahedron_27 + 5, + // Hexahedron_8 + 5, + // Point_1 + 6, + // Pyramid_13 + 6, + // Pyramid_14 + 6, + // Pyramid_5 + 6, + // Quadrangle_4 + 6, + // Quadrangle_8 + 6, + // Quadrangle_9 + 6, + // Tetrahedron_10 + 6, + // Tetrahedron_4 + 6, + // Triangle_3 + 6, + // Triangle_6 + 6, + // Wedge_15 + 6, + // Wedge_18 + 6, + // Wedge_6 + 6, +}; +static std::vector elemDim1TriFacesEntries{ + // Element_NA + + // Bar_2 + /*face0*/ 0, + /*face1*/ 1, + // Bar_3 + /*face0*/ 0, + /*face1*/ 1, + /*face2*/ 2, + // Hexahedron_20 + + // Hexahedron_27 + + // Hexahedron_8 + + // Point_1 + /*face0*/ 0, + // Pyramid_13 + + // Pyramid_14 + + // Pyramid_5 + + // Quadrangle_4 + + // Quadrangle_8 + + // Quadrangle_9 + + // Tetrahedron_10 + + // Tetrahedron_4 + + // Triangle_3 + + // Triangle_6 + + // Wedge_15 + + // Wedge_18 + + // Wedge_6 + +}; +static std::vector elemDim2TriFacesOffsets{ + // FOR OFFSETING + 0, + // Element_NA + 0, + // Bar_2 + 2, + // Bar_3 + 6, + // Hexahedron_20 + 6, + // Hexahedron_27 + 6, + // Hexahedron_8 + 6, + // Point_1 + 6, + // Pyramid_13 + 6, + // Pyramid_14 + 6, + // Pyramid_5 + 6, + // Quadrangle_4 + 14, + // Quadrangle_8 + 30, + // Quadrangle_9 + 46, + // Tetrahedron_10 + 46, + // Tetrahedron_4 + 46, + // Triangle_3 + 52, + // Triangle_6 + 64, + // Wedge_15 + 64, + // Wedge_18 + 64, + // Wedge_6 + 64, +}; +static std::vector elemDim2TriFacesEntries{ + // Element_NA + + // Bar_2 + /*face0*/ 0, 1, + // Bar_3 + /*face0*/ 0, 1, + /*face1*/ 1, 2, + // Hexahedron_20 + + // Hexahedron_27 + + // Hexahedron_8 + + // Point_1 + + // Pyramid_13 + + // Pyramid_14 + + // Pyramid_5 + + // Quadrangle_4 + /*face0*/ 0, 1, + /*face1*/ 1, 2, + /*face2*/ 2, 3, + /*face3*/ 3, 0, + // Quadrangle_8 + /*face0*/ 0, 4, + /*face1*/ 4, 1, + /*face2*/ 1, 5, + /*face3*/ 5, 2, + /*face4*/ 2, 6, + /*face5*/ 6, 3, + /*face6*/ 3, 7, + /*face7*/ 7, 0, + // Quadrangle_9 + /*face0*/ 0, 4, + /*face1*/ 4, 1, + /*face2*/ 1, 5, + /*face3*/ 5, 2, + /*face4*/ 2, 6, + /*face5*/ 6, 3, + /*face6*/ 3, 7, + /*face7*/ 7, 0, + // Tetrahedron_10 + + // Tetrahedron_4 + + // Triangle_3 + /*face0*/ 0, 1, + /*face1*/ 1, 2, + /*face2*/ 2, 0, + // Triangle_6 + /*face0*/ 0, 3, + /*face1*/ 3, 1, + /*face2*/ 1, 4, + /*face3*/ 4, 2, + /*face4*/ 2, 5, + /*face5*/ 5, 0, + // Wedge_15 + + // Wedge_18 + + // Wedge_6 + +}; +static std::vector elemDim3TriFacesOffsets{ + // FOR OFFSETING + 0, + // Element_NA + 0, + // Bar_2 + 0, + // Bar_3 + 0, + // Hexahedron_20 + 108, + // Hexahedron_27 + 216, + // Hexahedron_8 + 252, + // Point_1 + 252, + // Pyramid_13 + 318, + // Pyramid_14 + 384, + // Pyramid_5 + 402, + // Quadrangle_4 + 408, + // Quadrangle_8 + 420, + // Quadrangle_9 + 438, + // Tetrahedron_10 + 486, + // Tetrahedron_4 + 498, + // Triangle_3 + 501, + // Triangle_6 + 513, + // Wedge_15 + 591, + // Wedge_18 + 609, + // Wedge_6 + 633, +}; +static std::vector elemDim3TriFacesEntries{ + // Element_NA + + // Bar_2 + + // Bar_3 + + // Hexahedron_20 + /*face0*/ 0, 8, 11, 11, 10, 3, 8, 10, 11, 8, 1, 9, 2, 10, 9, 8, 9, 10, + /*face1*/ 4, 12, 15, 15, 14, 7, 12, 14, 15, 12, 5, 13, 6, 14, 13, 12, 13, 14, + /*face2*/ 0, 11, 16, 16, 15, 4, 11, 15, 16, 3, 19, 11, 7, 15, 19, 11, 19, 15, + /*face3*/ 1, 9, 17, 17, 13, 5, 9, 13, 17, 2, 18, 9, 6, 13, 18, 9, 18, 13, + /*face4*/ 0, 8, 16, 16, 12, 4, 8, 12, 16, 1, 17, 8, 17, 13, 12, 8, 17, 12, + /*face5*/ 3, 10, 19, 19, 14, 7, 10, 14, 19, 2, 18, 10, 6, 14, 18, 10, 18, 14, + // Hexahedron_27 + /*face0*/ 0, 8, 11, 11, 10, 3, 8, 10, 11, 8, 1, 9, 2, 10, 9, 8, 9, 10, + /*face1*/ 4, 12, 15, 15, 14, 7, 12, 14, 15, 12, 5, 13, 6, 14, 13, 12, 13, 14, + /*face2*/ 0, 11, 16, 16, 15, 4, 11, 15, 16, 3, 19, 11, 7, 15, 19, 11, 19, 15, + /*face3*/ 1, 9, 17, 17, 13, 5, 9, 13, 17, 2, 18, 9, 6, 13, 18, 9, 18, 13, + /*face4*/ 0, 8, 16, 16, 12, 4, 8, 12, 16, 1, 17, 8, 17, 13, 12, 8, 17, 12, + /*face5*/ 3, 10, 19, 19, 14, 7, 10, 14, 19, 2, 18, 10, 6, 14, 18, 10, 18, 14, + // Hexahedron_8 + /*face0*/ 0, 1, 2, 0, 2, 3, + /*face1*/ 4, 5, 6, 4, 6, 7, + /*face2*/ 0, 3, 4, 3, 7, 4, + /*face3*/ 1, 2, 5, 2, 6, 5, + /*face4*/ 0, 1, 4, 1, 5, 4, + /*face5*/ 3, 2, 7, 2, 6, 7, + // Point_1 + + // Pyramid_13 + /*face0*/ 0, 5, 8, 8, 7, 3, 5, 7, 8, 1, 6, 5, 6, 2, 7, 5, 6, 7, + /*face1*/ 0, 5, 9, 9, 10, 4, 1, 10, 5, 5, 10, 9, + /*face2*/ 0, 9, 8, 3, 8, 12, 4, 12, 9, 8, 9, 12, + /*face3*/ 3, 7, 12, 2, 11, 7, 4, 12, 11, 7, 11, 12, + /*face4*/ 1, 6, 10, 2, 11, 6, 4, 10, 11, 6, 11, 10, + // Pyramid_14 + /*face0*/ 0, 5, 8, 8, 7, 3, 5, 7, 8, 1, 6, 5, 6, 2, 7, 5, 6, 7, + /*face1*/ 0, 5, 9, 9, 10, 4, 1, 10, 5, 5, 10, 9, + /*face2*/ 0, 9, 8, 3, 8, 12, 4, 12, 9, 8, 9, 12, + /*face3*/ 3, 7, 12, 2, 11, 7, 4, 12, 11, 7, 11, 12, + /*face4*/ 1, 6, 10, 2, 11, 6, 4, 10, 11, 6, 11, 10, + // Pyramid_5 + /*face0*/ 0, 1, 2, 0, 2, 3, + /*face1*/ 0, 1, 4, + /*face2*/ 0, 4, 3, + /*face3*/ 2, 4, 3, + /*face4*/ 1, 2, 4, + // Quadrangle_4 + /*face0*/ 0, 1, 2, 0, 2, 3, + // Quadrangle_8 + /*face0*/ 0, 4, 7, 3, 7, 6, 4, 6, 7, 1, 5, 4, 2, 6, 5, 4, 5, 6, + // Quadrangle_9 + /*face0*/ 0, 4, 7, 3, 7, 6, 4, 6, 7, 1, 5, 4, 2, 6, 5, 4, 5, 6, + // Tetrahedron_10 + /*face0*/ 0, 4, 6, 2, 6, 5, 1, 5, 4, 4, 5, 6, + /*face1*/ 0, 4, 7, 3, 7, 8, 1, 8, 4, 4, 8, 7, + /*face2*/ 1, 8, 5, 3, 9, 8, 2, 5, 9, 5, 8, 9, + /*face3*/ 0, 7, 6, 2, 6, 9, 3, 9, 7, 6, 7, 9, + // Tetrahedron_4 + /*face0*/ 0, 1, 2, + /*face1*/ 0, 1, 3, + /*face2*/ 1, 3, 2, + /*face3*/ 0, 3, 2, + // Triangle_3 + /*face0*/ 0, 1, 2, + // Triangle_6 + /*face0*/ 0, 3, 5, 1, 4, 3, 2, 5, 4, 3, 4, 5, + // Wedge_15 + /*face0*/ 0, 6, 8, 1, 7, 6, 2, 8, 7, 6, 7, 8, + /*face1*/ 3, 9, 11, 4, 10, 9, 5, 11, 10, 9, 10, 11, + /*face2*/ 0, 6, 12, 12, 9, 3, 6, 9, 12, 1, 13, 6, 4, 9, 13, 6, 13, 9, + /*face3*/ 1, 7, 13, 4, 13, 10, 7, 10, 13, 2, 14, 7, 5, 10, 14, 7, 14, 10, + /*face4*/ 0, 8, 12, 12, 11, 3, 8, 11, 12, 2, 14, 8, 5, 11, 14, 8, 14, 11, + // Wedge_18 + /*face0*/ 0, 6, 8, 1, 7, 6, 2, 8, 7, 6, 7, 8, + /*face1*/ 3, 9, 11, 4, 10, 9, 5, 11, 10, 9, 10, 11, + /*face2*/ 0, 6, 12, 12, 9, 3, 6, 9, 12, 1, 13, 6, 4, 9, 13, 6, 13, 9, + /*face3*/ 1, 7, 13, 4, 13, 10, 7, 10, 13, 2, 14, 7, 5, 10, 14, 7, 14, 10, + /*face4*/ 0, 8, 12, 12, 11, 3, 8, 11, 12, 2, 14, 8, 5, 11, 14, 8, 14, 11, + // Wedge_6 + /*face0*/ 0, 1, 2, + /*face1*/ 3, 4, 5, + /*face2*/ 0, 1, 4, 0, 4, 3, + /*face3*/ 1, 2, 4, 2, 5, 4, + /*face4*/ 0, 2, 3, 2, 5, 3}; +std::vector getTriFacesOffsets(int dimension) { + switch (dimension) { + case 1: + return elemDim1TriFacesOffsets; + case 2: + return elemDim2TriFacesOffsets; + case 3: + return elemDim3TriFacesOffsets; + default: + assert(false); + return std::vector(); + } +}; +std::vector getTriFacesEntries(int dimension) { + switch (dimension) { + case 1: + return elemDim1TriFacesEntries; + case 2: + return elemDim2TriFacesEntries; + case 3: + return elemDim3TriFacesEntries; + default: + assert(false); + return std::vector(); + } +}; +static std::vector elemDim1TriLocFacesOffsets{ + // FOR OFFSETING + 0, + // Element_NA + 0, + // Bar_2 + 3, + // Bar_3 + 7, + // Hexahedron_20 + 7, + // Hexahedron_27 + 7, + // Hexahedron_8 + 7, + // Point_1 + 9, + // Pyramid_13 + 9, + // Pyramid_14 + 9, + // Pyramid_5 + 9, + // Quadrangle_4 + 9, + // Quadrangle_8 + 9, + // Quadrangle_9 + 9, + // Tetrahedron_10 + 9, + // Tetrahedron_4 + 9, + // Triangle_3 + 9, + // Triangle_6 + 9, + // Wedge_15 + 9, + // Wedge_18 + 9, + // Wedge_6 + 9, +}; +static std::vector elemDim1TriLocFacesEntries{ + // Element_NA + + // Bar_2 + 0, 1, 2, + // Bar_3 + 0, 1, 2, 3, + // Hexahedron_20 + + // Hexahedron_27 + + // Hexahedron_8 + + // Point_1 + 0, 1, + // Pyramid_13 + + // Pyramid_14 + + // Pyramid_5 + + // Quadrangle_4 + + // Quadrangle_8 + + // Quadrangle_9 + + // Tetrahedron_10 + + // Tetrahedron_4 + + // Triangle_3 + + // Triangle_6 + + // Wedge_15 + + // Wedge_18 + + // Wedge_6 + +}; +static std::vector elemDim2TriLocFacesOffsets{ + // FOR OFFSETING + 0, + // Element_NA + 0, + // Bar_2 + 2, + // Bar_3 + 4, + // Hexahedron_20 + 4, + // Hexahedron_27 + 4, + // Hexahedron_8 + 4, + // Point_1 + 4, + // Pyramid_13 + 4, + // Pyramid_14 + 4, + // Pyramid_5 + 4, + // Quadrangle_4 + 9, + // Quadrangle_8 + 14, + // Quadrangle_9 + 19, + // Tetrahedron_10 + 19, + // Tetrahedron_4 + 19, + // Triangle_3 + 23, + // Triangle_6 + 27, + // Wedge_15 + 27, + // Wedge_18 + 27, + // Wedge_6 + 27}; +static std::vector elemDim2TriLocFacesEntries{ + // Element_NA + + // Bar_2 + 0, 2, + // Bar_3 + 0, 4, + // Hexahedron_20 + + // Hexahedron_27 + + // Hexahedron_8 + + // Point_1 + + // Pyramid_13 + + // Pyramid_14 + + // Pyramid_5 + + // Quadrangle_4 + 0, 2, 4, 6, 8, + // Quadrangle_8 + 0, 4, 8, 12, 16, + // Quadrangle_9 + 0, 4, 8, 12, 16, + // Tetrahedron_10 + + // Tetrahedron_4 + + // Triangle_3 + 0, 2, 4, 6, + // Triangle_6 + 0, 4, 8, 12, + // Wedge_15 + + // Wedge_18 + + // Wedge_6 + +}; +static std::vector elemDim3TriLocFacesOffsets{ + // FOR OFFSETING + 0, + // Element_NA + 0, + // Bar_2 + 0, + // Bar_3 + 0, + // Hexahedron_20 + 7, + // Hexahedron_27 + 14, + // Hexahedron_8 + 21, + // Point_1 + 21, + // Pyramid_13 + 27, + // Pyramid_14 + 33, + // Pyramid_5 + 39, + // Quadrangle_4 + 41, + // Quadrangle_8 + 43, + // Quadrangle_9 + 45, + // Tetrahedron_10 + 50, + // Tetrahedron_4 + 55, + // Triangle_3 + 57, + // Triangle_6 + 59, + // Wedge_15 + 65, + // Wedge_18 + 71, + // Wedge_6 + 77}; +static std::vector elemDim3TriLocFacesEntries{ + // Element_NA + + // Bar_2 + + // Bar_3 + + // Hexahedron_20 + 0, 18, 36, 54, 72, 90, 108, + // Hexahedron_27 + 0, 18, 36, 54, 72, 90, 108, + // Hexahedron_8 + 0, 6, 12, 18, 24, 30, 36, + // Point_1 + + // Pyramid_13 + 0, 18, 30, 42, 54, 66, + // Pyramid_14 + 0, 18, 30, 42, 54, 66, + // Pyramid_5 + 0, 6, 9, 12, 15, 18, + // Quadrangle_4 + 0, 6, + // Quadrangle_8 + 0, 18, + // Quadrangle_9 + 0, 18, + // Tetrahedron_10 + 0, 12, 24, 36, 48, + // Tetrahedron_4 + 0, 3, 6, 9, 12, + // Triangle_3 + 0, 3, + // Triangle_6 + 0, 12, + // Wedge_15 + 0, 12, 24, 42, 60, 78, + // Wedge_18 + 0, 12, 24, 42, 60, 78, + // Wedge_6 + 0, 3, 6, 12, 18, 24}; +std::vector getTriLocFacesOffsets(int dimension) { + switch (dimension) { + case 1: + return elemDim1TriLocFacesOffsets; + case 2: + return elemDim2TriLocFacesOffsets; + case 3: + return elemDim3TriLocFacesOffsets; + default: + assert(false); + return std::vector(); + } +}; +std::vector getTriLocFacesEntries(int dimension) { + switch (dimension) { + case 1: + return elemDim1TriLocFacesEntries; + case 2: + return elemDim2TriLocFacesEntries; + case 3: + return elemDim3TriLocFacesEntries; + default: + assert(false); + return std::vector(); + } +}; + +}; // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/Params.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/Params.hxx new file mode 100644 index 0000000000000000000000000000000000000000..95e9ffe7c89b2658d41b68847de7fa1f28b8e709 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/Params.hxx @@ -0,0 +1,92 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include +#include +#include + +namespace Muscat::KK::FieldTransfer { + +struct TransferParams { + int dimension; + std::string fespace; + std::string method; + bool differentialOperator; +}; + +enum class FESpace : int { + LagrangeSpaceP0 = 0, + LagrangeSpaceP1, + LagrangeSpaceP2, + LagrangeSpaceGeo, + ConstantSpaceGlobal, + LagrangeSpaceP2Serendipity, +}; + +FESpace deserializeFESpace(std::string& s) { + if (s == "LagrangeSpaceP0") { + return FESpace::LagrangeSpaceP0; + } + if (s == "LagrangeSpaceP1") { + return FESpace::LagrangeSpaceP1; + } + if (s == "LagrangeSpaceP2") { + return FESpace::LagrangeSpaceP2; + } + if (s == "LagrangeSpaceGeo") { + return FESpace::LagrangeSpaceGeo; + } + if (s == "ConstantSpaceGlobal") { + return FESpace::ConstantSpaceGlobal; + } + if (s == "LagrangeSpaceP2Serendipity") { + return FESpace::LagrangeSpaceP2Serendipity; + } + std::cerr << "Invalid FESpace " << s << std::endl; + assert(false); + return FESpace::LagrangeSpaceP2Serendipity; +} + +enum class TransferMethodFlags : int { + NODE_NUMBERING = 0x100, +}; + +enum class TransferMethods : int { + INVALID = 0, + CLAMP = 0x1, + INTER = 0x102, + NEAREST = 0x103, + ZERO_FILL = 0x4, + EXTRAP = 0x5, +}; + +KOKKOS_INLINE_FUNCTION bool hasFlag(TransferMethods a, TransferMethodFlags b) { + return (static_cast(a) & static_cast(b)) != 0; +} + +TransferMethods deserializeTransferMethod(std::string& s) { + if (s == "Nearest/Nearest") { + return TransferMethods::NEAREST; + } + if (s == "Interp/Clamp") { + return TransferMethods::CLAMP; + } + if (s == "Interp/Nearest") { + return TransferMethods::INTER; + } + if (s == "Interp/ZeroFill") { + return TransferMethods::ZERO_FILL; + } + if (s == "Interp/Extrap") { + return TransferMethods::EXTRAP; + } + std::cerr << "Invalid Transfer Method " << s << std::endl; + assert(false); + return TransferMethods::INVALID; +} + +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/SparsedMatCOO.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/SparsedMatCOO.hxx new file mode 100644 index 0000000000000000000000000000000000000000..05f7131b35fffee46ac93e0fa36a63a3491f8c15 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/SparsedMatCOO.hxx @@ -0,0 +1,59 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include + +namespace Muscat::KK::FieldTransfer { + +/** + * A representation of a sparse matrix in the COO format on host device. + */ +class SparsedMatCOO { + public: + Kokkos::View rows; + Kokkos::View cols; + Kokkos::View vals; + + SparsedMatCOO() = default; + SparsedMatCOO(int nbVal) : cols(Kokkos::view_alloc(HostExecutionSpace{}, Kokkos::WithoutInitializing, "Rows Sparse Mat"), nbVal), + rows(Kokkos::view_alloc(HostExecutionSpace{}, Kokkos::WithoutInitializing, "Cols Sparse Mat"), nbVal), + vals(Kokkos::view_alloc(HostExecutionSpace{}, Kokkos::WithoutInitializing, "Vals Sparse Mat"), nbVal) {} + + /** + * @brief Retrieve a pointer to the row vector on the host side. + * @return The pointer to the row values. + */ + CMuscatIndex* GetRowPtr() { + return rows.data(); + } + + /** + * @brief Retrieve a pointer to the col vector data. + * @return The pointer to the col values. + */ + CMuscatIndex* GetColPtr() { + return cols.data(); + } + + /** + * @brief Retrieve a pointer to the data vector on the host side. + * @return The pointer to the values. + */ + CMuscatFloat* GetDataPtr() { + return vals.data(); + } + + /** + * @brief Retrieve the number of value specified in the matrix + * @return The amount of value in the vectors held by this object. + */ + constexpr CMuscatIndex GetSize() { + return rows.extent(0); + } +}; + +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/Transfer.hxx b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/Transfer.hxx new file mode 100644 index 0000000000000000000000000000000000000000..681c9dabd7a436577729e2fcab48a28070ff3165 --- /dev/null +++ b/cpp_src/MeshTools/TransferBackEnds/Kokkos/Utils/Transfer.hxx @@ -0,0 +1,172 @@ +// +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// + +#pragma once + +#include +#include + +#include +#include + +#include "MeshContainers/ElementUtilities.h" +#include "MeshTools/TransferBackEnds/Kokkos/Utils/FieldTransferTools.h" +#include "MeshTools/TransferBackEnds/Kokkos/Utils/Params.hxx" +#include "MeshTools/TransferBackEnds/Kokkos/Utils/Consts.hxx" + +namespace Muscat::KK::FieldTransfer { + +template +class TransferClassKokkos { + using MemorySpace = typename ExecSpace::memory_space; + + public: + TransferClassKokkos(TransferParams& params) : method(deserializeTransferMethod(params.method)), space(deserializeFESpace(params.fespace)), differentialOperator(params.differentialOperator) {}; + + // Parameters related + TransferMethods method; + FESpace space; + bool differentialOperator; + + // Input mesh related + CMuscatIndex nbElems; + int maxConnSizePerElt; + int nbLinearSkinFaces; + Kokkos::View*, MemorySpace> nodes; + Kokkos::View connectivity_values; + Kokkos::View connectivity_offsets; + Kokkos::View elementTypes; + + // Target cloud point related + Kokkos::View targetPoints; + + // Compute skin related + Kokkos::View skinFaces; + Kokkos::View skinParents; + + // Optional values used in some methods + int nbTargetOutside; + + // Output values + Kokkos::View targetElement; + Kokkos::View targetStatus; // mapped to [true = element found, false = point/triangle projection] +}; + +template +void SetupTransferClassKokkos(TransferClassKokkos& transfer, Mesh& m, MapMatrixDDD& targetPoints) { + Kokkos::Profiling::pushRegion("Setup transfer class"); + + using MemorySpace = typename ExecSpace::memory_space; + + ExecSpace execSpace{}; + Kokkos::DefaultHostExecutionSpace hostSpace{}; + + //=== Transfer target points coords eigen matrix to ArborXPoint Kokkos view and initialized status + auto ptr = targetPoints.data(); + auto size = targetPoints.rows(); + auto matSpaceDim = targetPoints.cols(); + transfer.targetElement = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "Target to elem"), size); + transfer.targetStatus = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "Target to status"), size); + auto unmanagedHost_tp = Kokkos::View>(ptr, size, matSpaceDim); + auto unmanagedDevice_tp = Kokkos::create_mirror_view_and_copy(execSpace, unmanagedHost_tp); + transfer.targetPoints = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "transfer.targetPoints"), size, matSpaceDim); + Kokkos::parallel_for( + "Copy nodes", Kokkos::RangePolicy(execSpace, 0, size), KOKKOS_LAMBDA(const int i) { + transfer.targetElement(i) = -1; + transfer.targetStatus(i) = TransferStatus::UNDEFINED; + for (int j = 0; j < matSpaceDim; j++) { + transfer.targetPoints(i, j) = unmanagedDevice_tp(i, j); + } + }); + + //=== Transfer nodes coords eigen matrix to ArborXPoint Kokkos view + ExecSpace execSpace2{}; + ptr = m.GetNodesMatrix().data(); + size = m.GetNodesMatrix().rows(); + matSpaceDim = m.GetNodesMatrix().cols(); + auto unmanagedHost = Kokkos::View>(ptr, size, matSpaceDim); + auto unmanagedDevice = Kokkos::create_mirror_view_and_copy(execSpace2, unmanagedHost); + transfer.nodes = Kokkos::View*, ExecSpace>( + Kokkos::view_alloc(execSpace2, Kokkos::WithoutInitializing, "transfer.nodes"), size); + auto minSpaceDim = std::min((int)matSpaceDim, spaceDimension); + Kokkos::parallel_for( + "Copy nodes", Kokkos::RangePolicy(execSpace2, 0, size), KOKKOS_LAMBDA(const int i) { + for (int j = 0; j < minSpaceDim; j++) { + transfer.nodes(i)[j] = unmanagedDevice(i, j); + } + }); + + //=== Set elements type and connectivity + if (transfer.method != TransferMethods::NEAREST) { + CMuscatIndex nbElems = 0; + CMuscatIndex countNodes = 0; + transfer.maxConnSizePerElt = -1; + transfer.nbLinearSkinFaces = 0; + for (const auto& x : m.elements.storage) { + auto dim = elementsInfo[x.first].__dimensionality; + if (dim < spaceDimension - 1) { + continue; + } + nbElems += x.second.GetNumberOfElements(); + countNodes += x.second.GetNumberOfElements() * elementsInfo[x.first].numberOfNodes; + transfer.maxConnSizePerElt = std::max(transfer.maxConnSizePerElt, elementsInfo[x.first].numberOfNodes); + transfer.nbLinearSkinFaces += x.second.GetNumberOfElements() * getNbFace(x.first); + } + + transfer.nbElems = nbElems; + transfer.elementTypes = Kokkos::View("Element types", nbElems); + transfer.connectivity_values = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "transfer.connectivity_values"), countNodes); + transfer.connectivity_offsets = Kokkos::View( + Kokkos::view_alloc(execSpace, Kokkos::WithoutInitializing, "transfer.connectivity_offsets"), nbElems + 1); + Kokkos::Experimental::fill_n(execSpace, Kokkos::Experimental::begin(transfer.connectivity_offsets), 1, 0); + + countNodes = 0; + CMuscatIndex countElems = 0; + execSpace.fence(); + Kokkos::Profiling::popRegion(); + Kokkos::Profiling::pushRegion("Setup transfer class connectivity"); + + for (const auto& x : m.elements.storage) { + auto dim = elementsInfo[x.first].__dimensionality; + if (dim < spaceDimension - 1) { + continue; + } + auto fromElem = countElems; + auto fromNode = countNodes; + + auto nbCurrentElems = x.second.GetNumberOfElements(); + auto nodePerElem = elementsInfo[x.first].numberOfNodes; + + countElems += nbCurrentElems; + countNodes += nbCurrentElems * nodePerElem; + + // Set connectivity for current element type + auto& localConnectivity = x.second.GetConnectivityMatrix(); + auto elemsConn = Kokkos::View((CMuscatIndex*)localConnectivity.data(), localConnectivity.size()); + auto connLocation = Kokkos::subview(transfer.connectivity_values, std::pair(fromNode, countNodes)); + Kokkos::deep_copy(connLocation, elemsConn); + + // Set indices for curent element type + auto connOffsets = Kokkos::subview(transfer.connectivity_offsets, std::pair(fromElem + 1, countElems + 1)); + auto elemTypes = Kokkos::subview(transfer.elementTypes, std::pair(fromElem, countElems)); + ElementType curType = x.first; + Kokkos::parallel_for( + "Fill transfer class", Kokkos::RangePolicy(execSpace, 0, nbCurrentElems), KOKKOS_LAMBDA(int i) { + connOffsets(i) = fromNode + nodePerElem * (i + 1); + elemTypes(i) = curType; + }); + } + } + Kokkos::fence(); + Kokkos::Profiling::popRegion(); +}; + +} // namespace Muscat::KK::FieldTransfer diff --git a/cpp_src/Tests/CMakeLists.txt b/cpp_src/Tests/CMakeLists.txt index 90d0d4c9c7fd0ade9786acf74220e2a851bdeeb5..8f7364f1322c14fa69d2d909b898f3195a3207b4 100644 --- a/cpp_src/Tests/CMakeLists.txt +++ b/cpp_src/Tests/CMakeLists.txt @@ -8,7 +8,7 @@ add_test(NAME TestObjectsAllocation COMMAND TestObjectsAllocation) add_executable(MassMatrix MassMatrix.cxx) target_link_libraries(MassMatrix PUBLIC Eigen3::Eigen ${NativeLib}) -if(MUSCAT_ENABLE_CUDA) +if(Kokkos_ENABLE_CUDA) target_compile_options(MassMatrix PUBLIC) endif() add_test(NAME MassMatrix COMMAND MassMatrix) @@ -18,7 +18,7 @@ add_test(NAME MassMatrix COMMAND MassMatrix) add_executable(TestMemoryAllocation TestMemoryAllocation.cxx) target_link_libraries(TestMemoryAllocation ${NativeLib}) target_link_libraries(TestMemoryAllocation Eigen3::Eigen) -if(MUSCAT_ENABLE_CUDA) +if(Kokkos_ENABLE_CUDA) target_compile_options(TestMemoryAllocation PUBLIC) # needed by nvcc_wrapper to force using host compiler (g++ e.g.) endif() add_test(NAME TestMemoryAllocation COMMAND TestMemoryAllocation) diff --git a/docs/developers/CMake.rst b/docs/developers/CMake.rst index ef50d9dfcf552df79b62be7494d148300af2e326..d4f037f9a568dcb2a4a51da4404f21503f69eb2d 100644 --- a/docs/developers/CMake.rst +++ b/docs/developers/CMake.rst @@ -11,27 +11,29 @@ The build system aims to be subdivided to locally generate files. It is intended to build as much as possible out of source (when possible). A ``cmake/`` folder contains all the **tools/macro/functions** to lighter the ``CMakeLists.txt``. -* The **CMakeList.txt** file present at the root of the project contains the *entry point* for the compilation. It also holds all the available options for the compilation. +* The **CMakeLists.txt** file present at the root superbuild folder of the project contains the *entry point* for the compilation. Prior to configuring and building Muscat's project, if Kokkos not found it builds Kokkos dependencies with the desired backend (OpenMP, CUDA, HIP, *etc.*) and auto-detects the device architecture. +* The **CMakeLists.txt** file present at the root is the real *entry point* of Muscat's project. It holds all the available options for the compilation. * The ``cmake/Requirements.cmake`` is being the first imported and ensure that all the **softwares/dependencies** required for Muscat compilation are present on the sytem. It however comes after the options to allow tuning of what are the requirements accordingly. * There is a ``CMakeLists.txt`` in each source directories (*cpp_src*, *src*) as well as in the *cpp_generators* folder. CMake Compilation ================= -Compilation (using ninja) in "release" can be done using the folowing commands: +By default, the project is configured and built with Kokkos::OpenMP backend and to add CUDA support add the CMake argument *-DKokkos_ENABLE_CUDA=ON*. For more Kokkos CMake keywords, please refer to [#kokkosconfig]_. +Compilation (using ninja) in "release" with default Kokkos::OpenMP backend can be done using the following commands: For more information about option of cmake please refer to the cmake documentation [#cmakedoc]_: .. code-block:: mkdir cmake_build cd cmake_build - cmake -DPython_FIND_STRATEGY=LOCATION -DCMAKE_BUILD_TYPE=Release --fresh .. + cmake -DCMAKE_BUILD_TYPE=Release --fresh ../superbuild/ We recommend to use ``Ninja`` build system if available on your system by replacing the last command by: .. code-block:: - cmake -G Ninja -DPython_FIND_STRATEGY=LOCATION -DCMAKE_BUILD_TYPE=Release --fresh .. + cmake -G Ninja -DCMAKE_BUILD_TYPE=Release --fresh ../superbuild/ To build the project: @@ -52,14 +54,14 @@ To run all the tests .. code-block:: - ctest + ctest --test-dir Muscat-build/ You will find more explanations on tests in :ref:`testing`. Notes for Maintaining the Build System ====================================== -As the build system is hierarchical, it is necessary to explicity specify when a variable will be reused in a *parent scope* by writing: +As the build system is hierarchical, it is necessary to explicitly specify when a variable will be reused in a *parent scope* by writing: .. code-block:: cmake @@ -67,3 +69,4 @@ As the build system is hierarchical, it is necessary to explicity specify when a .. rubric:: Footnotes .. [#cmakedoc] https://cmake.org/documentation/ +.. [#kokkosconfig] https://kokkos.org/kokkos-core-wiki/get-started/configuration-guide.html diff --git a/docs/developers/Kokkos.rst b/docs/developers/Kokkos.rst index 716db2c528e097707b0e4d0f412479170a1e1116..3f7e919713d7c1aaf0f4579a95129d3c2bc4acf0 100644 --- a/docs/developers/Kokkos.rst +++ b/docs/developers/Kokkos.rst @@ -4,8 +4,8 @@ Kokkos ====== -Kokkos integration is done on the cpp side (in *cpp_src* folder). The branching to CPU/GPU is made by cython in KokkosHelper.pyx. -Therfore, adding a new Kokkos file is as simple as: +Kokkos integration is done on the cpp side (in *cpp_src* folder). The branching to CPU/GPU is made by cython in KokkosHelper.pyx. +Therefore, adding a new Kokkos file is as simple as: #. Creating your ``cpp_src/*/Kokkos/xxx.[cxx,h]`` file. #. Creating your ``src/*/Kokkos/xxx.pyx`` file. @@ -47,7 +47,7 @@ Your cxx file(s) should be similar to: namespace Muscat::KK { template int GetUniqueRows(/* args */) { - typedef typename ExecSpace::memory_space MemSpace; + typedef typename ExecSpace::memory_space MemorySpace; //Your code... return 0; } diff --git a/docs/environment_readthedocs.yml b/docs/environment_readthedocs.yml index 45faa7b69ccf2c2c47d42dc94659bd4bfcad60a0..e6207a8f29aa9aff76c634929ba9515a953ed7a6 100644 --- a/docs/environment_readthedocs.yml +++ b/docs/environment_readthedocs.yml @@ -23,12 +23,12 @@ dependencies: - setuptools-scm - networkx - kokkos + - arborx - pyvista - cmake - ninja - mmgsuite - pycgns-core - - compilers - breathe - furo diff --git a/docs/rstFiles/Cheat_sheet.rst b/docs/rstFiles/Cheat_sheet.rst index e20944d1bc7a95241aa763c143f588f7e5797806..85183a3be9227ed9b5573700271873ffac2b5ad9 100644 --- a/docs/rstFiles/Cheat_sheet.rst +++ b/docs/rstFiles/Cheat_sheet.rst @@ -81,8 +81,8 @@ Mesh Import, Export and Creation | to/from | | | | MeshIO format | from Muscat.Bridges.MeshIOBridge import MeshToMeshIO | | | | MeshToMeshIO(meshiodata) | | -| | from Muscat.Bridges.MeshIOBridge import MeshIOTOMesh | | -| | MeshIOTOMesh(muscatMesh) | | +| | from Muscat.Bridges.MeshIOBridge import MeshIOToMesh | | +| | MeshIOToMesh(muscatMesh) | | +--------------------+---------------------------------------------------------------------------------------------------------------+--------------------------------------------------------+ | Convert |.. code:: python | :py:mod:`API` | | to/from | | | diff --git a/docs/rstFiles/FieldTransfer.rst b/docs/rstFiles/FieldTransfer.rst index 3cb960ea00504de43c1a410ea8f14ed606eade60..90adbefe4000ca07691512eb553a885a0323ed72 100644 --- a/docs/rstFiles/FieldTransfer.rst +++ b/docs/rstFiles/FieldTransfer.rst @@ -228,7 +228,7 @@ From :numref:`transfer-problem`, we can see that two following cases can be iden The complexity of the algorithm is linear with respect to :math:`N` (the number of target points). For each target point, we need to find the candidate elements. This is done using a geometry search algorithm. -:math:`k`-d tree [#kdtree]_ and R-tree [#rtree]_ data structures are good choices to reduce the complexity of the search step at the expense of the tree construction. +:math: BVH tree [#bvhtree]_ `k`-d tree [#kdtree]_ and R-tree [#rtree]_ data structures are good choices to reduce the complexity of the search step at the expense of the tree construction. Finally with all the shape functions values the transfer operator :math:`P_{N,n}` is given by @@ -258,11 +258,12 @@ At a glance, a generic algorithm looks like this: - If no element is found, use the closest element and calculate the orthogonal projection to :math:`\mathcal{M}` - Construct the transfer operator using the shape functions values (assembly of line :math:`j` of :math:`P`). -Muscat Implementation -********************* +Muscat Implementations +********************** -In this section, we describe in detail the C++ Muscat implementation. -In Muscat, different user options are available to control the kind of algorithm to apply in the case where the target point :math:`T_j` lies inside or outside the mesh :math:`M`. +In this section, we describe in detail both legacy C++ & Kokkos-ArborX implementations. + +For both implementations, different user options are available to control the kind of algorithm to apply in the case where the target point :math:`T_j` lies inside or outside the mesh :math:`M`. Currently Muscat supports the following options: - "Interp/Nearest" : Interpolation inside, and nearest node if :math:`T_j` lies outside :math:`M` @@ -271,9 +272,7 @@ Currently Muscat supports the following options: - "Interp/Extrap" : Interpolation inside, and extrapolation (use of the shape function evaluated outside) if :math:`T_j` lies outside :math:`M` - "Interp/ZeroFill" : Interpolation inside, and no transfer (empty coefficient in :math:`P`) if :math:`T_j` lies outside :math:`M` -For more information please refer to :py:meth:`Muscat.MeshTools.MeshFieldOperations.GetFieldTransferOpCpp`. - -Some extra notions must be introduced to make the explanation of the algorithm more digest: +For more information please refer to :py:meth:`Muscat.MeshTools.MeshFieldOperations.GetFieldTransferOpCpp` and :py:meth:`Muscat.MeshTools.MeshFieldOperations.GetFieldTransferOpKokkos`. - **Child elements** @@ -301,7 +300,7 @@ Some extra notions must be introduced to make the explanation of the algorithm m For 0D element (point elements), all points have a projection. -In the next sections we present, in detail, the implementation of the transfer field operation in Muscat. +In the next sections we present, in detail, both legacy C++ & Kokkos-ArborX implementations of the transfer field operation in Muscat. Interface (API) --------------- @@ -320,9 +319,11 @@ The inputs of the algorithm are: - **Element Filter**: of type :py:class:`Muscat.MeshContainers.Filters.FilterObjects.ElementFilter`. In many cases the user want to transfer only a partial part of the field, this filter select the elements (part of :math:`M`) to be considered as the source for the transfer. + This option is only covered by the legacy C++ implementation. If set in the Kokkos-ArborX implementation, we fall back to the legacy one (to be implement in Kokoks-ArborX framework). - **options**: of type :py:class:`dict`. Extra options are available to select the different strategies for the potential element search and to activate the computation of the derivation operator. + These options are only covered by the legacy C++ implementation. If set in the Kokkos-ArborX implementation, we fall back to the legacy one (to be implement in Kokoks-ArborX framework). Available options are: @@ -360,11 +361,11 @@ And some less relevant, arguments: - 4: "ZeroFill": No information is computed (the point is outside :math:`M`). - 5: "MultipleExtrapPoint": for some points outside the mesh, the concept of closest element is not well defined. Extrapolation is done using one element. -- The entities. For every target point, the entities vector holds the id of the entity (node or element) used to compute the transfer operator. + - The entities. For every target point, the entities vector holds the id of the entity (node or element) used to compute the transfer operator. If status = 0, then entity is a node id. If status :math:`\in [1,2,3,5]`, the entity is an element id. -Algorithmic Core ----------------- +Legacy C++ Algorithmic Core +--------------------------- The first step of the algorithm is to build all the structures for the fast geometrical search of geometrical primitives (points or edges). We use the boost::kdtree [#kdtree]_ for this job. @@ -426,6 +427,55 @@ For all other cases we have: - If no element containing the target points is found: - Use the shape functions values (for extrapolation) or the clamped shape functions (shape function from the first child element with a projection), to construct the transfer operator, and set the status and the entities outputs accordingly. +Kokkos-ArborX Algorithmic Core +------------------------------ + +The first step of the algorithm is to build all the structures for the fast geometrical search of geometrical primitives (points or finite elements). +We use the ArborX BVH trees [#bvhtree]_ for this job. + +For specific "Interp/Clamp" and "Interp/Extrap" methods the triangulated mesh skin is computed to find/project the target points to the nearest mesh domain skin. + +Now that all the preliminary work is done (bvh-trees and, eventually, triangulated skin-mesh building), we loop over the target points to compute the transfer operator. + +In the case of the user chooses the "Nearest/Nearest" approach, a simplified algorithm is used because we don't need to find any elements. + +The construction of the output (transfer operators, status, entities) is trivial. + +For all other cases we have: + +- for each target point :math:`T_j`: + + - Generate a list of intersecting elements's bounding boxes that potentially contain the :math:`T_j` + + - For each candidate element :math:`\mathbf{e}`: + + - Compute the barycentric :math:`\mathbf{\eta^e}` coordinates for the point :math:`T_j` (solve :eq:`Xeta` for :math:`\mathbf{\eta^e}`). + - If the barycentric coordinates define a point inside an element :math:`e`: + + - Compute the interpolation using the equation :eq:`Ueta` + - Assemble line :math:`j` of the transfer operator + - Set correct values for the status[j] = 1, and entities[j]= e + - Go to the next target point + + - If the barycentric coordinates define a point outside all elements : + + - If "Interp/Nearest" : + + - Find nearest node :math:`n` + - Set correct values for the status[j] = 0, and entities[j]= n + + - If "Interp/Clamp" : + - Find nearest element (distance to skin-mesh) :math:`e` + - Project target point :math:`T_j` on :math:`e` that gives :math:`Tproj_j` + - Assemble line :math:`j` of the transfer operator considering :math:`e` & :math:`Tproj_j` + - Set correct values for the status[j] = 1, and entities[j]= e + + - If "Interp/Extrap" : + - Find nearest element (distance to skin-mesh) :math:`e` + - Assemble line :math:`j` of the transfer operator considering :math:`e` & :math:`T_j` + - Set correct values for the status[j] = 1, and entities[j]= e + + - Go to the next target point Pathological Cases ****************** @@ -436,7 +486,7 @@ Here we explain the motivation of the different strategies for the construction Anisotropic meshes in 2D ------------------------ -In the case of meshes with distorted elements, the basic strategy of finding the potential elements using the simple strategy of the closest node does not work properly. +In the case of meshes with distorted elements, the basic strategy of finding the potential elements using the simple strategy of the closest node does not work properly for the legacy C++ implementation. .. _DistordMeshFig: @@ -449,8 +499,9 @@ In the case of meshes with distorted elements, the basic strategy of finding the Examples of a 2D mesh with distorted elements -:numref:`DistordMeshFig`, shows a case where the strategy of the closest point fails because the closest node is not attached to the element that contains the target point. +:numref:`DistordMeshFig`, shows a case where the strategy of the legacy C++ implementation closest point option fails because the closest node is not attached to the element that contains the target point. +For the Kokkos-ArborX, the method is more robust as it relies on bounding box intersecting and thus, if a point is inside an element it will always intersect a bounding box. However, since the bounding boxes are aligned with the axis, we may have a great number of boxes leading to a relatively (demonstrated) additional cost Examples ******** @@ -468,12 +519,16 @@ Performance ../notebooks/FieldTransferPerformance -Conclusions -*********** +Contributors +************ +- Felipe BORDEU: `fbordeu `_ +- Ramzi MESSAHEL: `messahelramzi `_ +- Florian LAINE: `Frayzen `_ .. rubric:: Footnotes .. [#wikiMortarmethods] https://en.wikipedia.org/wiki/Mortar_methods .. [#kdtree] :math:`k`-d tree Wikipedia. https://en.wikipedia.org/wiki/K-d_tree .. [#rtree] Boost spatial indexes. https://live.boost.org/doc/libs/1_85_0/libs/geometry/doc/html/geometry/spatial_indexes/introduction.html +.. [#bvhtree] ArborX : A Performance Portable Geometric Search Library. https://doi.org/10.1145/3412558 diff --git a/docs/rstFiles/Installation.rst b/docs/rstFiles/Installation.rst index 3511070b1f5877a3684638e80dd8ed847d5fffed..6feeda7fb57a9c62a8e100f8aadb51fdff6684d6 100644 --- a/docs/rstFiles/Installation.rst +++ b/docs/rstFiles/Installation.rst @@ -7,7 +7,7 @@ Installing Muscat Conda ***** -If you use conda, you can install Muscat from the `conda-forge channel `_. +If you use conda, you can install Muscat CPU version from the `conda-forge channel `_. A good practice is to use a separate environment rather than modifying the base environment: @@ -29,6 +29,7 @@ The Conda-Forge packages of Muscat are split in 4 packages : * Muscat: this meta package install Muscat-core and Muscat-extensions to have a full installation in one shot. * Muscat-devenv: is a meta package with the dependencies necessarily for the development, debugging, compilation and documentation generation. +One may consider Muscat-devenv without Kokkos & ArborX packages that will be automatically fetched and configured auto-detecting your GPU hardware if an internet connection is available. PIP *** @@ -36,8 +37,8 @@ PIP Pip installation is not available any more. The main reason is the increasing complexity of the building chain (kokkos and Cuda support). -Manual Installation (from Sources) for Developers -************************************************* +Manual Installation (from Sources) for advanced Users and Developers +********************************************************************* In the case you want the latest developer version, make changes to Muscat or contribute new features, an installation from sources is mandatory. The sources can be downloaded from `https://gitlab.com/drti/muscat `_: @@ -48,7 +49,9 @@ The sources can be downloaded from `https://gitlab.com/drti/muscat `_). +Add the ``MUSCAT_REPOSITORY/build/Muscat-build/src/:MUSCAT_REPOSITORY/src/`` folders to the ``PYTHONPATH`` environment variables (more information on `https://docs.python.org/3/using/cmdline.html\#envvar-PYTHONPATH `_). .. warning:: - **Make sure** ``MUSCAT_REPOSITORY/build/src/`` **is placed before** ``MUSCAT_REPOSITORY/src/`` + **Make sure** ``MUSCAT_REPOSITORY/build/Muscat-build/src/`` **is placed before** ``MUSCAT_REPOSITORY/src/`` The user can also install permanently using (Not recommended): @@ -182,7 +185,9 @@ C++ Dependencies +----------------+------------+-------+---+-----+---+--------------------+----------------------------------------------------+ |libboost-headers| >=1.8 2 |\* | | | |\* | For the compilation of the extension field transfer| +----------------+------------+-------+---+-----+---+--------------------+----------------------------------------------------+ -|kokkos |>=v4.4 |\* |\* | | |\* | https://github.com/kokkos/kokkos | +|kokkos | =v4.7.01 |\* |\* | | |\* | https://github.com/kokkos/kokkos | ++----------------+------------+-------+---+-----+---+--------------------+----------------------------------------------------+ +|arborx | =v2.0.1 |\* |\* | | |\* | https://github.com/arborx/ArborX | +----------------+------------+-------+---+-----+---+--------------------+----------------------------------------------------+ |mmg |>=5.8 |\* |\* | | |\* | https://www.mmgtools.org/ | +----------------+------------+-------+---+-----+---+--------------------+----------------------------------------------------+ diff --git a/recipes/CMake/bld_core.bat b/recipes/CMake/bld_core.bat index 7fe846cd1790433d4083a5a31e47b8e671414408..2e2ad99a348927b52f5b8f55829dcc27127051b6 100644 --- a/recipes/CMake/bld_core.bat +++ b/recipes/CMake/bld_core.bat @@ -4,7 +4,7 @@ mkdir build cd build -cmake .. -G "Ninja" ^ +cmake ../superbuild -G "Ninja" ^ -D CMAKE_BUILD_TYPE=Release ^ -D CMAKE_EXPORT_COMPILE_COMMANDS:BOOL="TRUE" ^ -D Muscat_ENABLE_Mumps:BOOL=ON ^ @@ -18,8 +18,11 @@ cmake .. -G "Ninja" ^ -D CMAKE_INSTALL_DATAROOTDIR="Library/share" ^ -D CMAKE_INSTALL_PREFIX="%PREFIX%" +cmake ^ + --build . ^ + --config Release ^ + -j 1 -%python% setup.py build_clib -if errorlevel 1 exit 1 -%python% -m pip install --no-deps . -vv -if errorlevel 1 exit 1 +cmake ^ +--install ./Muscat-build ^ +--config Release diff --git a/recipes/CMake/build_core.sh b/recipes/CMake/build_core.sh index d127c48a16f752542c04639ac28b4057c712c4d4..467c75a02ae8f95393a45812e848e1b4498ab258 100644 --- a/recipes/CMake/build_core.sh +++ b/recipes/CMake/build_core.sh @@ -14,16 +14,16 @@ cmake ${CMAKE_ARG} \ -D CMAKE_INSTALL_PREFIX=${PREFIX} \ -G Ninja \ -B ${PWD}/cmakeBuild \ --S . +-S ./superbuild cmake \ --build cmakeBuild \ --parallel 16 -ctest \ ---test-dir cmakeBuild \ ---output-on-failure \ --E coverage \ +ctest \ +--test-dir cmakeBuild/Muscat-build \ +--output-on-failure \ +-E coverage \ --parallel 16 -cmake --install cmakeBuild +cmake --install cmakeBuild/Muscat-build diff --git a/recipes/CMake/meta.yaml b/recipes/CMake/meta.yaml index 47d5a073f7598443459ab6e6a24ac02f7a17e644..29f66164b8020271d127755c77e7b1349a7f8e63 100644 --- a/recipes/CMake/meta.yaml +++ b/recipes/CMake/meta.yaml @@ -50,9 +50,8 @@ outputs: - libgomp # [linux] - pip - setuptools - # kokkos 4.6 is only available on linux - - kokkos =4.7.* h* # [linux] - - kokkos =4.4.01 h* # [not linux] + - kokkos =4.7.01 h* + - arborx =2.0.1 # mmg software api changes can break the bridge (for version) - mmgsuite =5.8 - libscotch # [not win] @@ -79,8 +78,8 @@ outputs: - networkx >=3.0 - dill - pywin32 # [win] - - kokkos =4.7.* h* # [linux] - - kokkos =4.4.01 h* # [not linux] + - kokkos =4.7.01 h* + - arborx =2.0.1 - mmgsuite =5.8 test: requires: @@ -174,8 +173,8 @@ outputs: - pywin32 # [win] - dill - cmake - - kokkos =4.7.* h* # [linux] - - kokkos =4.4.01 h* # [not linux] + - kokkos =4.7.01 h* + - arborx =2.0.1 - mmgsuite =5.8 - pycgns-core # [py > 39] - pycgns # [py == 39] diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 484a1029d64e536611a5668702f6fe150c4174fb..1f8e63c91d7b861806283026d76ac83cc3b94064 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -32,6 +32,7 @@ set(CYTHON_MODULES MeshTools MeshTools/RemeshBackEnds MeshTools/TransferBackEnds + MeshTools/TransferBackEnds/Kokkos LinAlg Helpers ) @@ -56,7 +57,7 @@ if(Muscat_ENABLE_Python) unset(pyxFiles) foreach(CYTHON_SRC ${CYTHON_SRCS}) - if(Muscat_ENABLE_CUDA) + if(Kokkos_ENABLE_CUDA) cythonize(${CYTHON_SRC} "--host-only") # needed by nvcc_wrapper to force using host compiler (g++ e.g.) else() cythonize(${CYTHON_SRC} "") @@ -66,6 +67,7 @@ if(Muscat_ENABLE_Python) set(CYTHON_KK_SRCS MeshTools/TransferBackEnds/NativeTransfer.pyx + MeshTools/TransferBackEnds/Kokkos/FieldTransferKokkos.pyx Helpers/Kokkos/KokkosHelper.pyx LinAlg/Kokkos/Utils.pyx FE/Integrators/NativeKokkosIntegration.pyx diff --git a/src/Muscat/FE/Integration.py b/src/Muscat/FE/Integration.py index 4dbc0f6a47f952cc3d62a15dd48c3c2f4ab59c08..be3c92b4f6c38d0cd18edafd4c7ce6fb8c259520 100644 --- a/src/Muscat/FE/Integration.py +++ b/src/Muscat/FE/Integration.py @@ -27,7 +27,7 @@ from Muscat.FE.SymWeakForm import GetField, GetTestField, GetNormal import Muscat.FE.WeakForms.NumericalWeakForm as WeakForm from Muscat.FE.IntegrationRules import LagrangeIsoParamQuadrature, IntegrationRulesFactory, GetIntegrationRuleByName -UseKokkos = True +UseKokkos = False UseCpp = True UseMultiThread = True MultiThreadThreshold = 1000 diff --git a/src/Muscat/MeshTools/MeshFieldOperations.py b/src/Muscat/MeshTools/MeshFieldOperations.py index edc4028b90c037a9c11557ae187a5c46582050e3..83dd0ea95eb00056c8ef637d321dfd1d83430a87 100644 --- a/src/Muscat/MeshTools/MeshFieldOperations.py +++ b/src/Muscat/MeshTools/MeshFieldOperations.py @@ -17,8 +17,10 @@ from Muscat.MeshTools.MeshInspectionTools import ExtractElementsByElementFilter from Muscat.LinAlg.Transform import Transform from Muscat.FE.Fields.FEField import FEField from Muscat.MeshTools.TransferBackEnds.FieldTransferOpPython import GetFieldTransferOpPython +from Muscat.MeshTools.TransferBackEnds.Kokkos.FieldTransferKokkos import GetFieldTransferOpKokkosHost +from Muscat.MeshTools.TransferBackEnds.Kokkos.FieldTransferKokkos import GetFieldTransferOpKokkosDevice from Muscat.Helpers.Logger import Error - +import Muscat.Helpers.Kokkos.KokkosHelper as kh def ApplyRotationMatrixTensorField(fields: Dict, fieldsToTreat: List[List], baseNames: List = ["v1", "v2"], inPlace: bool = False, prefix: str = "new_", inverse: bool = False) -> Dict: """Apply a rotation operation on the fields @@ -128,10 +130,10 @@ def GetFieldTransferOpCpp( The methods available are: - - "Interp/Nearest" - "Nearest/Nearest" - - "Interp/Clamp" + - "Interp/Nearest" - "Interp/Extrap" + - "Interp/Clamp" - "Interp/ZeroFill" Possible values (int) for the returned status are: @@ -153,10 +155,10 @@ def GetFieldTransferOpCpp( A couple for the algorithm used when the point is inside/outside the mesh - - "Interp" -> to use the interpolation of the FEField to extract the values - "Nearest" -> to use the closest node to extract the values - - "Clamp" -> to use the closest point to the surface if the point is outside + - "Interp" -> to use the interpolation of the FEField to extract the values - "Extrap" -> to use shape function of the closest element to compute a extrapolation + - "Clamp" -> to use the closest point to the surface if the point is outside - "ZeroFill" -> fill with zero in the case the point is outside If None is provided then "Interp/Clamp" is used @@ -176,8 +178,8 @@ def GetFieldTransferOpCpp( Returns ------- - Tuple [np.ndarray,np.ndarray] - a tuple with 2 object containing: + Tuple [np.ndarray,np.ndarray,np.ndarray] + a tuple with 3 objects containing: - op, sparse matrix with the operator to make the transfer - status: vector of ints with the status transfer for each target point: - entities: vector of the ids of the entities (node or element) used to compute the transfer operator. @@ -228,6 +230,95 @@ def GetFieldTransferOpCpp( entities = nt.GetEntitiesIds() return op, status, entities +def GetFieldTransferOpKokkos( + inputField: FEField, targetPoints: ArrayLike, method: Union[str, None] = None, verbose: bool = False, elementFilter: Optional[ElementFilter] = None, options: Optional[Dict] = None, targetDim=-1 +) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + """Compute the transfer operator from the inputField to the target points so, Using the Kokkos portable (CPU/GPU) implementation: + valueAtTargetPoints = op.dot(FEField.data) + + The methods available are: + + - "Nearest/Nearest" + - "Interp/Nearest" + - "Interp/Extrap" + - "Interp/Clamp" + - "Interp/ZeroFill" + + Possible values (int) for the returned status are: + + - 0: "Nearest" + - 1: "Interp" + - 2: "Extrap" + - 3: "Clamp" + - 4: "ZeroFill" + + Parameters + ---------- + inputField : FEField + the FEField to be transferred + targetPoints : ArrayLike + Numpy array of the target points. Position to extract the values + method : Union[str,None], optional + + A couple for the algorithm used when the point is inside/outside the mesh + + - "Nearest" -> to use the closest node to extract the values + - "Interp" -> to use the interpolation of the FEField to extract the values + - "Extrap" -> to use shape function of the closest element to compute a extrapolation + - "Clamp" -> to use the closest point to the surface if the point is outside + - "ZeroFill" -> fill with zero in the case the point is outside + + If None is provided then "Interp/Clamp" is used + verbose : bool, optional + Print a progress bar, by default False + elementFilter : Optional[ElementFilter], optional + ElementFilter to extract the information from only a subdomain not used in the current implementation, by default None. + If set, GetFieldTransferOpCpp is called instead. + targetDim : Optional[int], optional + target points dimension (e.g. targetPoints.shape[1]). If source mesh dimension < target points dimension GetFieldTransferOpCpp is called instead. + + options : Optional[Dict], optional + Dictionary used in case GetFieldTransferOpCpp is called. It contains one or more key:values pairs: + - "usePointSearch": bool , to activate the research of potential element by point, default (True) + - "useElementSearch": bool , to activate the research of potential element by a center of the closest element and it neighbors, default (False) + - "useElementSearchFast": bool , to activate the research of potential element by a center of the closest element only (option useElementSearch must be true) default (False) + - "useEdgeSearch": bool , to activate the research of potential element using the closest edge. default (True) + - "DifferentialOperator": int, to activate the computation of the derivative of the field (not the value), default -1. + Possible values are; -1 for value transfer; 0 to compute the d/dx; 1 to compute d/dy; 2 to compute d/dz. If not -1, GetFieldTransferOpCpp is called. + + Returns + ------- + Tuple [np.ndarray,np.ndarray,np.ndarray] + a tuple with 3 objects containing: + - op, sparse matrix with the operator to make the transfer + - status: vector of ints with the status transfer for each target point: + - entities: vector of the ids of the entities (node or element) used to compute the transfer operator. + If status = 0, then entity is a node id. if status :math:`\in [1,2,3,5]` the status is an element id. + + return op, status, entities + + """ + dimension = inputField.mesh.GetElementsDimensionality() + dimension_tp = targetDim + if (targetDim == -1): + dimension_tp = targetPoints.shape[1] + assert(dimension > 0 and dimension < 4) + + if dimension < dimension_tp: + print("Warning!!! dimension_source < dimension_tp => GetFieldTransferOpCpp is used !") + return GetFieldTransferOpCpp(inputField, targetPoints, method, verbose, elementFilter, options) + if elementFilter is not None: + print("Warning!!! elementFilter is defined => GetFieldTransferOpCpp is used !") + return GetFieldTransferOpCpp(inputField, targetPoints, method, verbose, elementFilter, options) + if options is not None: + if options["DifferentialOperator"] is not -1: + print("Warning!!! DifferentialOperator option is asked (is not -1) => GetFieldTransferOpCpp is used !") + return GetFieldTransferOpCpp(inputField, targetPoints, method, verbose, elementFilter, options) + + if kh.shouldUseDevice(): + return GetFieldTransferOpKokkosDevice(inputField, targetPoints, method, dimension) + else: + return GetFieldTransferOpKokkosHost(inputField, targetPoints, method, dimension) def TransportPosToPoints(mesh: Mesh, points: ArrayLike, method: str = "Interp/Clamp", verbose: bool = False) -> np.ndarray: """For each point in points compute the position of the source data by transporting the position field @@ -397,7 +488,7 @@ def CheckIntegrityApplyRotationMatrixTensorField(GUI: bool = False) -> str: return "ok" -def RunTransfer(inputFEField, data, outmesh, methods = ["Interp/Nearest", "Nearest/Nearest", "Interp/Clamp", "Interp/Extrap"]) -> None: +def RunTransfer(inputFEField, data, outmesh, methods = ["Nearest/Nearest", "Interp/Nearest", "Interp/Extrap", "Interp/Clamp"]) -> None: from Muscat.Helpers.IO.TemporaryDirectory import TemporaryDirectory tempdir = TemporaryDirectory.GetTempPath() @@ -421,10 +512,16 @@ def RunTransfer(inputFEField, data, outmesh, methods = ["Interp/Nearest", "Neare res += TFormat.Center("%.4f" % d, fill=" ", width=20) + "|" print(res) + opt = { + "usePointSearch": True, + "useElementSearch": True, + "useEdgeSearch": True + } + PrintRow(["method", "cpp [ms]", "python [ms]", "speedup "]) for method in methods: cppStartTime = time.time() - opCpp, statusCpp, entitiesIdsCpp = GetFieldTransferOpCpp(inputFEField, outmesh.nodes, method=method, verbose=logger.getEffectiveLevel() <= 20) + opCpp, statusCpp, entitiesIdsCpp = GetFieldTransferOpCpp(inputFEField, outmesh.nodes, method=method, verbose=logger.getEffectiveLevel() <= 20, options=opt) cppStopTime = time.time() newdataCpp = opCpp.dot(data) PointFieldsNames.append(method + "Cpp") @@ -463,13 +560,88 @@ def RunTransfer(inputFEField, data, outmesh, methods = ["Interp/Nearest", "Neare print(newdataCpp) print(newdataPython) - WriteMeshToXdmf(tempdir + "GetFieldTransferOp_TargetMesh" + inputFEField.name + ".xdmf", outmesh, PointFields=PointFields, PointFieldsNames=PointFieldsNames, Binary = True, HDF5= False) - print(f"Last file with the data : \n {tempdir}GetFieldTransferOp_TargetMesh{inputFEField.name}.xdmf") + WriteMeshToXdmf(tempdir + "GetFieldTransferOpCpp_TargetMesh" + inputFEField.name + ".xdmf", outmesh, PointFields=PointFields, PointFieldsNames=PointFieldsNames, Binary = True, HDF5= False) + print(f"Last file with the data : \n {tempdir}GetFieldTransferOpCpp_TargetMesh{inputFEField.name}.xdmf") print(f"Cpp and python version of the field transfer gives differents solutions for method : {method}") raise - WriteMeshToXdmf(tempdir + "GetFieldTransferOp_TargetMesh" + inputFEField.name + ".xdmf", outmesh, PointFields=PointFields, PointFieldsNames=PointFieldsNames, Binary = True, HDF5= False) + WriteMeshToXdmf(tempdir + "GetFieldTransferOpCpp_TargetMesh" + inputFEField.name + ".xdmf", outmesh, PointFields=PointFields, PointFieldsNames=PointFieldsNames, Binary = True, HDF5= False) + +def RunTransferKokkos(inputFEField, data, outmesh, methods = ["Nearest/Nearest", "Interp/Nearest", "Interp/Extrap", "Interp/Clamp"]) -> None: + from Muscat.Helpers.IO.TemporaryDirectory import TemporaryDirectory + + tempdir = TemporaryDirectory.GetTempPath() + + from Muscat.IO.XdmfWriter import WriteMeshToXdmf + + WriteMeshToXdmf(tempdir + "GetFieldTransferOp_Original" + inputFEField.name + ".xdmf", inputFEField.mesh, PointFields=[data], PointFieldsNames=["OriginalData"]) + + PointFields = [] + PointFieldsNames = [] + from Muscat.Helpers.Logger import logger + import time + from Muscat.Helpers.TextFormatHelper import TFormat + + def PrintRow(datarow): + res = "|" + for d in datarow: + if type(d) is str: + res += TFormat.Center(str(d), fill=" ", width=20) + "|" + else: + res += TFormat.Center("%.4f" % d, fill=" ", width=20) + "|" + print(res) + + PrintRow(["method", "kokkos [ms]", "python [ms]", "speedup "]) + for method in methods: + KKStartTime = time.time() + opKK, statusKK, entitiesIdsKK = GetFieldTransferOpKokkos(inputFEField, outmesh.nodes, method=method, verbose=logger.getEffectiveLevel() <= 20, targetDim=outmesh.GetElementsDimensionality()) + KKStopTime = time.time() + newdataKK = opKK.dot(data) + PointFieldsNames.append(method + "KK") + PointFields.append(newdataKK) + PointFieldsNames.append(method + "Status" + "KK") + PointFields.append(statusKK) + newPosKK = opKK.dot(inputFEField.mesh.nodes) + PointFieldsNames.append(method + "Pos" + "KK") + PointFields.append(newPosKK) + PointFieldsNames.append(method + "Entities" + "KK") + PointFields.append(entitiesIdsKK) + + pythonStartTime = time.time() + opPython, statusPython, entitiesIdsPython = GetFieldTransferOpPython(inputFEField, outmesh.nodes, method=method, verbose=logger.getEffectiveLevel() <= 20) + pythonStopTime = time.time() + newdataPython = opPython.dot(data) + PointFieldsNames.append(method + "Python") + PointFields.append(newdataPython) + PointFieldsNames.append(method + "Status" + "Python") + PointFields.append(statusPython) + newPosPython = opPython.dot(inputFEField.mesh.nodes) + PointFieldsNames.append(method + "Pos" + "Python") + PointFields.append(newPosPython) + PointFieldsNames.append(method + "Entities" + "Python") + PointFields.append(entitiesIdsPython) + + PrintRow([method, KKStopTime - KKStartTime, pythonStopTime - pythonStartTime, (pythonStopTime - pythonStartTime) / ((KKStopTime - KKStartTime) if KKStopTime - KKStartTime > 0 else 1)]) + PointFields.append(newdataKK-newdataPython) + PointFieldsNames.append(method+"_Diff_KK_python") + try: + if method == "Interp/Extrap" or method == "Interp/Clamp" : + np.testing.assert_allclose(newdataKK, newdataPython, rtol=1e-5, atol=0) + else : + np.testing.assert_allclose(newdataKK, newdataPython) + except: # pragma: no cover + print(method) + print(newdataKK.shape) + print(newdataPython.shape) + print(newdataKK) + print(newdataPython) + + WriteMeshToXdmf(tempdir + "GetFieldTransferOpKK_TargetMesh" + inputFEField.name + ".xdmf", outmesh, PointFields=PointFields, PointFieldsNames=PointFieldsNames, Binary = True, HDF5= False) + print(f"Last file with the data : \n {tempdir}GetFieldTransferOpKK_TargetMesh{inputFEField.name}.xdmf") + print(f"KK and python version of the field transfer gives differents solutions for method : {method}") + raise + WriteMeshToXdmf(tempdir + "GetFieldTransferOpKK_TargetMesh" + inputFEField.name + ".xdmf", outmesh, PointFields=PointFields, PointFieldsNames=PointFieldsNames, Binary = True, HDF5= False) def CheckIntegrity1D(GUI: bool = False) -> str: from Muscat.MeshTools.MeshCreationTools import CreateUniformMeshOfBars @@ -500,7 +672,7 @@ def CheckIntegrity1D(GUI: bool = False) -> str: from Muscat.Helpers.Logger import logger - for method, ax in zip(["Interp/Nearest", "Nearest/Nearest", "Interp/Clamp", "Interp/Extrap", "Interp/ZeroFill"], axis): + for method, ax in zip(["Nearest/Nearest", "Interp/Nearest", "Interp/Extrap", "Interp/Clamp", "Interp/ZeroFill"], axis): op = GetFieldTransferOp(inputFEField, b.nodes, method=method, verbose=logger.getEffectiveLevel() <= 20)[0] result = op.dot(data) @@ -524,7 +696,7 @@ def CheckIntegrity1D(GUI: bool = False) -> str: def CheckIntegrity2D(GUI: bool = False) -> str: from Muscat.MeshTools.MeshCreationTools import CreateSquare - inputmesh = CreateSquare(dimensions=[5, 5], origin=[0, 0], spacing=[1.0 / 4, 1.0 / 4.0]) + inputmesh = CreateSquare(dimensions=[5, 5], origin=[0, 0], spacing=[1.0 / 4.0, 1.0 / 4.0]) from Muscat.FE.FETools import PrepareFEComputation @@ -539,6 +711,7 @@ def CheckIntegrity2D(GUI: bool = False) -> str: y = inputmesh.nodes[:, 1] data = (x - 0.5) ** 2 - y * 0.5 + x * y * 0.25 RunTransfer(inputFEField, data, b) + RunTransferKokkos(inputFEField, data, b, methods = ["Nearest/Nearest", "Interp/Nearest", "Interp/Clamp"]) return "ok" @@ -565,6 +738,7 @@ def CheckIntegrity1DTo2D(GUI: bool = False) -> str: y = inputmesh.nodes[:, 1] data = (x - 0.5) ** 2 - y * 0.5 + x * y * 0.25 RunTransfer(inputFEField, data, b) + RunTransferKokkos(inputFEField, data, b) return "ok" @@ -593,7 +767,8 @@ def CheckIntegrity2DTo3D(GUI: bool = False) -> str: x = inputmesh.nodes[:, 0] y = inputmesh.nodes[:, 1] data = (x - 0.5) ** 2 - y * 0.5 + x * y * 0.25 - RunTransfer(inputFEField, data, b, methods = ["Interp/Nearest", "Nearest/Nearest", "Interp/Clamp"]) + RunTransfer(inputFEField, data, b, methods = ["Nearest/Nearest", "Interp/Nearest", "Interp/Clamp"]) + RunTransferKokkos(inputFEField, data, b, methods = ["Nearest/Nearest", "Interp/Nearest", "Interp/Clamp"]) return "ok" @@ -710,6 +885,7 @@ def CheckIntegrity3DTo3D(GUI: bool = False) -> str: data = (x - 0.5) ** 2 - y * 0.5 + x * y * 0.25 RunTransfer(inputFEField, data, b) + RunTransferKokkos(inputFEField, data, b, methods = ["Nearest/Nearest", "Interp/Nearest", "Interp/Clamp"]) return "ok" diff --git a/src/Muscat/MeshTools/TransferBackEnds/Kokkos/FieldTransferKokkos.pyx b/src/Muscat/MeshTools/TransferBackEnds/Kokkos/FieldTransferKokkos.pyx new file mode 100644 index 0000000000000000000000000000000000000000..e8202ca6784dfd3789db58016a24d0289c1a8901 --- /dev/null +++ b/src/Muscat/MeshTools/TransferBackEnds/Kokkos/FieldTransferKokkos.pyx @@ -0,0 +1,94 @@ +#distutils: language = c++ +#cython: language_level = 3 +# +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# + +from Muscat.CythonTypes cimport CMuscatIndex, CMuscatFloat +import Muscat.Helpers.Kokkos.KokkosHelper as kh +from Muscat.MeshContainers.ElementsDescription import ElementsInfo +from Muscat.CythonTypes cimport MapMatrixDDD, MapMatrixIDD, MapMatrixID1, GetSharedMMatrixID1, GetSharedMMatrixDDD, CMuscatIndex +from libcpp.string cimport string +from typing import Tuple, Optional, Union, List, Dict +import numpy as np + +from Muscat.MeshContainers.Mesh import Mesh +from Muscat.Types import MuscatFloat, MuscatIndex, ArrayLike +from Muscat.FE.Fields.FEField import FEField +from Muscat.MeshTools.MeshModificationTools import CleanLonelyNodes + +cimport Muscat.MeshContainers.NativeMesh as cNUM +from libcpp cimport bool as cbool + + +cdef extern from "MeshTools/TransferBackEnds/Kokkos/Utils/SparsedMatCOO.hxx" namespace "Muscat::KK::FieldTransfer": + cdef cppclass SparsedMatCOO: + CMuscatIndex * GetRowPtr() + CMuscatIndex * GetColPtr() + CMuscatFloat * GetDataPtr() + CMuscatIndex GetSize() + +cdef extern from "Helpers/Kokkos/KokkosHelper.h" namespace "Muscat::KK" : + ctypedef int HostExecutionSpace + ctypedef int DeviceExecutionSpace + +cdef extern from "MeshTools/TransferBackEnds/Kokkos/Utils/Params.hxx" namespace "Muscat::KK::FieldTransfer": + cdef struct TransferParams: + int dimension + string fespace + string method + +cdef extern from "MeshTools/TransferBackEnds/Kokkos/FieldTransferKokkos.hxx" namespace "Muscat::KK::FieldTransfer": + SparsedMatCOO FieldTransferWrapper[ExecSpace](TransferParams &, cNUM.Mesh &, MapMatrixDDD & , MapMatrixID1 &, MapMatrixID1 &) + + +cdef GetOperator(CMuscatIndex size, CMuscatIndex* rowPtr, CMuscatIndex* colPtr, CMuscatFloat* dataPtr, CMuscatIndex nbTargetPoints, CMuscatIndex nbSourcePoints): + from scipy.sparse import coo_matrix + cdef CMuscatFloat[::1] d = dataPtr + cdef CMuscatIndex[::1] r = rowPtr + cdef CMuscatIndex[::1] c = colPtr + + return coo_matrix((d, (r, c)), shape=(nbTargetPoints, nbSourcePoints), copy=True) + +def GetFieldTransferOpKokkosDevice(inputField: FEField, targetPoints: ArrayLike, method : str, dimension : int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + cdef cNUM.CMesh mesh = cNUM.CMesh() + + mesh.SetDataFromPython(inputField.mesh) + nbSourcePoints = inputField.mesh.nodes.shape[0] + nbTargetPoints = targetPoints.shape[0] + + status = np.empty((nbTargetPoints), dtype=MuscatIndex) + elements = np.empty((nbTargetPoints), dtype=MuscatIndex) + + cdef TransferParams params + params.dimension = dimension + params.fespace = inputField.space.name.encode() + params.method = method.encode() + + cdef SparsedMatCOO mat = FieldTransferWrapper[DeviceExecutionSpace](params, mesh.GetCppPointer()[0], GetSharedMMatrixDDD(targetPoints).get()[0], GetSharedMMatrixID1(status).get()[0], GetSharedMMatrixID1(elements).get()[0]) + + coo_matrix = GetOperator(mat.GetSize(), mat.GetRowPtr(), mat.GetColPtr(), mat.GetDataPtr(), nbTargetPoints, nbSourcePoints) + return (coo_matrix, status, elements) + +def GetFieldTransferOpKokkosHost(inputField: FEField, targetPoints: ArrayLike, method : str, dimension : int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + cdef cNUM.CMesh mesh = cNUM.CMesh() + + mesh.SetDataFromPython(inputField.mesh) + nbSourcePoints = inputField.mesh.nodes.shape[0] + nbTargetPoints = targetPoints.shape[0] + + status = np.empty((nbTargetPoints), dtype=MuscatIndex) + elements = np.empty((nbTargetPoints), dtype=MuscatIndex) + + cdef TransferParams params + params.dimension = dimension + params.fespace = inputField.space.name.encode() + params.method = method.encode() + + cdef SparsedMatCOO mat = FieldTransferWrapper[HostExecutionSpace](params, mesh.GetCppPointer()[0], GetSharedMMatrixDDD(targetPoints).get()[0], GetSharedMMatrixID1(status).get()[0], GetSharedMMatrixID1(elements).get()[0]) + coo_matrix = GetOperator(mat.GetSize(), mat.GetRowPtr(), mat.GetColPtr(), mat.GetDataPtr(), nbTargetPoints, nbSourcePoints) + return (coo_matrix, status, elements) + +def CheckIntegrity(GUI: bool = False): + return "ok" diff --git a/src/Muscat/MeshTools/TransferBackEnds/Kokkos/__init__.py b/src/Muscat/MeshTools/TransferBackEnds/Kokkos/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b5dfe673b44f190e9e65d941af9da81a72860b5e --- /dev/null +++ b/src/Muscat/MeshTools/TransferBackEnds/Kokkos/__init__.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- +# +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# +# + +_test = [ + 'FieldTransferKokkos', + ] diff --git a/superbuild/CMakeLists.txt b/superbuild/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..715807a5b34f6e26d644693b016af6b78483a2d4 --- /dev/null +++ b/superbuild/CMakeLists.txt @@ -0,0 +1,107 @@ +cmake_minimum_required(VERSION 3.28) +cmake_policy(SET CMP0177 OLD) + +project(Superbuild) + +include(ExternalProject) + +# Where dependencies will be installed +set(SB_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/deps-install) + +# ----------------------------------------------- +# 0. Default variables +# ----------------------------------------------- + +get_cmake_property(_all CACHE_VARIABLES) +set(_user_args "") +foreach(var ${_all}) + # Exclude internal CMake variables (optional, but recommended) + if(NOT var MATCHES "^CMAKE_") + list(APPEND _user_args "-D${var}=${${var}}") + endif() +endforeach() + +message(STATUS "User provided the following CMake args = ${_user_args}") + +# ----------------------------------------------- +# 1. Detect whether Kokkos is already available +# ----------------------------------------------- +set(BUILD_KOKKOS OFF) + +# Case 1: User manually specified Kokkos_DIR +if(DEFINED Kokkos_DIR) + message(STATUS "User provided Kokkos_DIR = ${Kokkos_DIR}") + + find_package(Kokkos QUIET CONFIG) + if(NOT Kokkos_FOUND) + message(WARNING "Kokkos_DIR was provided but is invalid → will build Kokkos") + set(BUILD_KOKKOS ON) + else() + message(STATUS "Using system-installed Kokkos at ${Kokkos_DIR}") + endif() + +else() + # Case 2: Try to find an installed copy + find_package(Kokkos CONFIG) + if(Kokkos_FOUND) + message(STATUS "Found installed Kokkos: ${Kokkos_DIR}") + else() + message(STATUS "Kokkos not found → will build it") + set(BUILD_KOKKOS ON) + endif() +endif() + +# ----------------------------------------------- +# 2. Add external project if needed +# ----------------------------------------------- +if(BUILD_KOKKOS) + ExternalProject_Add(kokkos + GIT_REPOSITORY https://github.com/kokkos/kokkos.git + GIT_TAG 4.7.01 + CMAKE_ARGS ${_user_args} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_INSTALL_PREFIX=${SB_INSTALL_PREFIX} + -DKokkos_ENABLE_OPENMP=ON + INSTALL_DIR ${SB_INSTALL_PREFIX} + ) + + # If Kokkos is built, point Kokkos_DIR to its install location + set(KOKKOS_EFFECTIVE_DIR "${SB_INSTALL_PREFIX}/lib/cmake/Kokkos") +else() + # If not built, use the existing one + set(KOKKOS_EFFECTIVE_DIR "${Kokkos_DIR}") +endif() + +message(STATUS "KOKKOS_EFFECTIVE_DIR = ${KOKKOS_EFFECTIVE_DIR}") + +# ----------------------------------------------- +# 3. Build your real project, depending on kokkos (only if built) +# ----------------------------------------------- +set(DEPS "") +if(BUILD_KOKKOS) + list(APPEND DEPS kokkos) +endif() + +set(Muscat_CXX_COMPILER "${CMAKE_CXX_COMPILER}") +if(Muscat_ENABLE_CUDA) + if(NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang|AppleClang") + message(STATUS "C++ compiler is NOT LLVM/Clang: ${CMAKE_CXX_COMPILER_ID}, nvcc_wrapper is used!") + if(BUILD_KOKKOS) + set(Muscat_CXX_COMPILER "{SB_INSTALL_PREFIX}/bin/nvcc_wrapper") + else() + set(Muscat_CXX_COMPILER "nvcc_wrapper") + endif() + endif() +endif() + +ExternalProject_Add(Muscat + SOURCE_DIR ${CMAKE_SOURCE_DIR}/../ + BINARY_DIR ${CMAKE_BINARY_DIR}/Muscat-build + DEPENDS ${DEPS} + CMAKE_ARGS + ${_user_args} + -DCMAKE_CXX_COMPILER=${Muscat_CXX_COMPILER} + -DKokkos_DIR=${KOKKOS_EFFECTIVE_DIR} + -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} + INSTALL_COMMAND "" # disable install step +)