From 665aae684a72c149386cbd00feb690bf8c145db9 Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 25 Feb 2020 15:50:40 +0100 Subject: [PATCH 01/18] Added IDR(S)Stab(L) to Eigen --- Eigen/IterativeLinearSolvers | 3 +- Eigen/src/IterativeLinearSolvers/IDRStab.h | 1000 ++++++++++++++++++++ test/.directory | 4 + test/CMakeLists.txt | 23 +- test/idrstab.cpp | 48 + 5 files changed, 1066 insertions(+), 12 deletions(-) create mode 100755 Eigen/src/IterativeLinearSolvers/IDRStab.h create mode 100644 test/.directory create mode 100644 test/idrstab.cpp diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers index 957d5750b..5d757e378 100644 --- a/Eigen/IterativeLinearSolvers +++ b/Eigen/IterativeLinearSolvers @@ -13,7 +13,7 @@ #include "src/Core/util/DisableStupidWarnings.h" -/** +/** * \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module * * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a squared matrix, usually very large and sparse. @@ -40,6 +40,7 @@ #include "src/IterativeLinearSolvers/ConjugateGradient.h" #include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" #include "src/IterativeLinearSolvers/BiCGSTAB.h" +#include "src/IterativeLinearSolvers/IDRStab.h" #include "src/IterativeLinearSolvers/IncompleteLUT.h" #include "src/IterativeLinearSolvers/IncompleteCholesky.h" diff --git a/Eigen/src/IterativeLinearSolvers/IDRStab.h b/Eigen/src/IterativeLinearSolvers/IDRStab.h new file mode 100755 index 000000000..bb15747f5 --- /dev/null +++ b/Eigen/src/IterativeLinearSolvers/IDRStab.h @@ -0,0 +1,1000 @@ +/* + C.E.M. Schoutrop + + The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L) + //TODO: elaborate what this improves over BiCGStab here + + Possible optimizations (PO): + -See //PO: notes in the code + + This implementation of IDRStab is based on + 1. Aihara, K., Abe, K., & Ishiwata, E. (2014). A variant of IDRstab with reliable update strategies for solving sparse linear systems. Journal of Computational and Applied Mathematics, 259, 244-258. doi:10.1016/j.cam.2013.08.028 + 2. Aihara, K., Abe, K., & Ishiwata, E. (2015). Preconditioned IDRStab Algorithms for Solving Nonsymmetric Linear Systems. International Journal of Applied Mathematics, 45(3). + 3. Saad, Y. (2003). Iterative Methods for Sparse Linear Systems: Second Edition. Philadelphia, PA: SIAM. + 4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family of Simple and Fast Algorithms for Solving Large Nonsymmetric Systems of Linear Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062. doi:10.1137/070685804 + 5. Sonneveld, P. (2012). On the convergence behavior of IDR (s) and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598. + + Special acknowledgement to Mischa Senders for his work on an initial reference implementation of this algorithm in MATLAB, to Lex Kuijpers for testing the IDRStab implementation and to Adithya Vijaykumar for providing the framework for this solver. + + Right-preconditioning based on Ref. 3 is implemented here. +*/ + +#ifndef idrstab_h +#define idrstab_h + +#include +#include +#include +#include + +//TODO: Remove the debug info, and obsolete options in final version. +#define IDRSTAB_DEBUG_INFO 0 //Print info to console about the problem being solved. +#define SAVE_FAILS 0 //Save matrices that didn't converge +#define IDRSTAB_ACCURATE true //true: Accurate version to pass unit tests, false: time-optimized version that works for probably every reasonable case. +#define IDRSTAB_INF_NORM false //true: Use the faster, slightly more strict and not normally used, infinity norm where possible. False: use the standard 2-norm. + +#if SAVE_FAILS +#include +#include "eigen3/unsupported/Eigen/src/SparseExtra/MarketIO.h" +#endif +#if IDRSTAB_DEBUG_INFO > 0 +#include +#endif +namespace Eigen +{ + + namespace internal + { + + template + bool idrstab(const MatrixType& mat, const Rhs& rhs, Dest& x, + const Preconditioner& precond, Index& iters, + typename Dest::RealScalar& tol_error, Index L, Index S) + { + + /* + Setup and type definitions. + */ + typedef typename Dest::Scalar Scalar; + typedef typename Dest::RealScalar RealScalar; + typedef Matrix VectorType; + typedef Matrix DenseMatrixTypeCol; + typedef Matrix DenseMatrixTypeRow; + #if IDRSTAB_DEBUG_INFO >0 + std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); + std::cout << "Matrix size: " << mat.rows() << std::endl; + std::cout << mat.IsRowMajor << std::endl; + //std::cout << "rhs\n" << rhs << std::endl; + //std::cout << "A\n" << mat << std::endl; + #endif + + const Index N = x.rows(); + + Index k = 0; //Iteration counter + const Index maxIters = iters; + + //PO: sqrNorm() saves 1 sqrt calculation + #if IDRSTAB_INF_NORM + const RealScalar rhs_norm = rhs.template lpNorm(); + #else + const RealScalar rhs_norm = rhs.norm(); + #endif + const RealScalar tol2 = tol_error * rhs_norm; + + if (rhs_norm == 0) + { + /* + If b==0, then the exact solution is x=0. + rhs_norm is needed for other calculations anyways, this exit is a freebie. + */ + /* + exit + */ + #if IDRSTAB_DEBUG_INFO >1 + std::cout << "rhs_norm==0 exit" << std::endl; + #endif + x.setZero(); + tol_error = 0.0; + return true; + } + //Construct decomposition objects beforehand. + #if IDRSTAB_ACCURATE + ColPivHouseholderQR qr_solver; + FullPivLU lu_solver; + #else + HouseholderQR qr_solver; + PartialPivLU lu_solver; + #endif + /* + Solving small S x S systems: + From numerical experiment is follows that in some cases (when the tolerance is really too low) inverting sigma is delicate, + since the resulting alpha will be close to zero. If this is not done accurately enough the algorithm breaks down. + Based on the benchmark data from: https://eigen.tuxfamily.org/dox/group__DenseDecompositionBenchmark.html + it is concluded that for the small (SxS) systems the difference between PartialPivLU and FullPivLU is negligible. + HouseholderQR methods can be used for these systems as well, however it is about 2x as slow. + + Solving least squares problem in the polynomial step: + ColPivHouseholderQR is sufficiently accurate, based on the benchmark data this is about 2x as fast compared to FillPivHouseholderQR. + + Resulting strategy: + -Use the more accurate, but slower FullPivLU version for small systems + -Use ColPivHouseholderQR for the minimal residual step. + */ + + if (S >= N || L >= N) + { + /* + The matrix is very small, or the choice of L and S is very poor + in that case solving directly will be best. + */ + /* + Exit + */ + #if IDRSTAB_DEBUG_INFO >1 + std::cout << "Trivial matrix exit" << std::endl; + #endif + lu_solver.compute(DenseMatrixTypeRow(mat)); + x = lu_solver.solve(rhs); + #if IDRSTAB_INF_NORM + tol_error = (rhs - mat * x).template lpNorm() / rhs_norm; + #else + tol_error = (rhs - mat * x).norm() / rhs_norm; + #endif + return true; + } + + //Define maximum sizes to prevent any reallocation later on. + VectorType u(N * (L + 1)); + VectorType r(N * (L + 1)); + DenseMatrixTypeCol V(N * (L + 1), S); + DenseMatrixTypeCol U(N * (L + 1), S); + DenseMatrixTypeCol rHat(N, L + 1); + + VectorType alpha(S); + VectorType gamma(L); + VectorType update(N); + + /* + Main IDRStab algorithm + */ + //Set up the initial residual + r.head(N) = rhs - mat * precond.solve(x); + #if IDRSTAB_INF_NORM + tol_error = r.head(N).template lpNorm(); + #else + tol_error = r.head(N).norm(); + #endif + + DenseMatrixTypeRow h_FOM(S, S - 1); + h_FOM.setZero(); + + /* + Determine an initial U matrix of size N x S + */ + for (Index q = 0; q < S; ++q) + { + //By default S=4, q!=0 case is more likely, this ordering is better for branch prediction. + //Arnoldi-like process to generate a set of orthogonal vectors spanning {u,A*u,A*A*u,...,A^(S-1)*u}. + //This construction can be combined with the Full Orthogonalization Method (FOM) from Ref.3 to provide a possible early exit with no additional MV. + if (q != 0) + { + /* + Original Gram-Schmidt orthogonalization strategy from Ref. 1: + */ + //u.head(N) -= U.topLeftCorner(N, q) * (U.topLeftCorner(N, q).adjoint() * u.head(N)); + + /* + Modified Gram-Schmidt strategy: + Note that GS and MGS are mathematically equivalent, they are NOT numerically equivalent. + + Eventough h is a scalar, converting the dot product to Scalar is not supported: + http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1610 + */ + VectorType w = mat * precond.solve(u.head(N)); + for (Index i = 0; i < q; ++i) + { + //"Normalization factor" (v is normalized already) + VectorType v = U.block(0, i, N, 1); + + //"How much do v and w have in common?" + h_FOM(i, q - 1) = v.adjoint() * w; + + //"Subtract the part they have in common" + w = w - h_FOM(i, q - 1) * v; + } + u.head(N) = w; + h_FOM(q, q - 1) = u.head(N).norm(); + + if (std::abs(h_FOM(q, q - 1)) != 0.0) + { + /* + This only happens if u is NOT exactly zero. In case it is exactly zero + it would imply that that this u has no component in the direction of the current residual. + + By then setting u to zero it will not contribute any further (as it should). + Whereas attempting to normalize results in division by zero. + + Such cases occur if: + 1. The basis of dimension 1) + { + /* + Check for early FOM exit. + */ + //Scalar beta = tol_error; //r.head(N).norm(): This is expected to be tol_error at this point! + Scalar beta = r.head(N).norm(); //This must be the actual length of the vector, not the infinity norm estimate. + VectorType e1 = VectorType::Zero(S - 1); + e1(0) = beta; + lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1)); + VectorType y = lu_solver.solve(e1); + VectorType x2 = x + U.block(0, 0, N, S - 1) * y; + + //Using proposition 6.7 in Saad, one MV can be saved to calculate the residual + #if IDRSTAB_INF_NORM + RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.block(0, S - 1, N, 1)).template lpNorm(); + #else + RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.block(0, S - 1, N, 1)).norm(); + #endif + + #if IDRSTAB_DEBUG_INFO >1 + std::cout << "h_FOM\n" << h_FOM << std::endl; + std::cout << "h_FOM(S-1, S - 2)\n" << h_FOM(S - 1, S - 2) << std::endl; + std::cout << "y(S-1)\n" << y(S - 2) << std::endl; + std::cout << "y\n" << y << std::endl; + #if IDRSTAB_INF_NORM + std::cout << "U.block(0,S-1,N,1).template lpNorm()\n" << U.block(0, S - 1, N, 1).template lpNorm() << std::endl; + RealScalar FOM_actual_res = (rhs - mat * precond.solve(x2)).template lpNorm() / rhs_norm; + #else + std::cout << "U.block(0,S-1,N,1).norm()\n" << U.block(0, S - 1, N, 1).norm() << std::endl; + RealScalar FOM_actual_res = (rhs - mat * precond.solve(x2)).norm() / rhs_norm; + #endif + std::cout << "FOM actual residual\n" << FOM_actual_res << std::endl; + std::cout << "FOM estimated residual from Saad method\n " << FOM_residual / rhs_norm << std::endl; + #endif + if (FOM_residual < tol2) + { + /* + Exit, the FOM algorithm was already accurate enough + */ + iters = k; + x = precond.solve(x2); + tol_error = FOM_residual / rhs_norm; + #if IDRSTAB_DEBUG_INFO >0 + std::cout << "Speculative FOM exit" << std::endl; + std::cout << "Estimated relative residual: " << tol_error << std::endl; + #if IDRSTAB_INF_NORM + std::cout << "True relative residual: " << (mat * x - rhs).template lpNorm() / rhs.template lpNorm() << + std::endl; + #else + std::cout << "True relative residual: " << (mat * x - rhs).norm() / rhs.norm() << std::endl; + #endif + + #endif + return true; + } + } + + #if IDRSTAB_DEBUG_INFO >1 + //Columns of U should be orthonormal + std::cout << "Check orthonormality U\n" << + U.block(0, 0, N, S).adjoint()*U.block(0, 0, N, S) << std::endl; + //h_FOM should not contain any NaNs + std::cout << "h_FOM:\n" << h_FOM << std::endl; + #endif + + /* + Select an initial (N x S) matrix R0. + 1. Generate random R0, orthonormalize the result. + 2. This results in R0, however to save memory and compute we only need the adjoint of R0. This is given by the matrix R_T.\ + Additionally, the matrix (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the anti-distributivity property of the adjoint. + This results in AR_T, which is constant if R_T does not have to be regenerated and can be precomputed. Based on reference 4, + this has zero probability in exact arithmetic. However in practice it does (extremely infrequently) occur, + most notably for small matrices. + */ + //PO: To save on memory consumption identity can be sparse + //PO: can this be done with colPiv/fullPiv version as well? This would save 1 construction of a HouseholderQR object + + //Original IDRStab and Kensuke choose S random vectors: + HouseholderQR qr(DenseMatrixTypeCol::Random(N, S)); + DenseMatrixTypeRow R_T = (qr.householderQ() * DenseMatrixTypeCol::Identity(N, + S)).adjoint(); + DenseMatrixTypeRow AR_T = DenseMatrixTypeRow(R_T * mat); + + #if IDRSTAB_DEBUG_INFO >1 + std::cout << "Check orthonormality R_T\n" << + R_T* R_T.adjoint() << std::endl; + #endif + + //Pre-allocate sigma, this space will be recycled without additional allocations. + DenseMatrixTypeCol sigma(S, S); + + int rt_counter = k; //Iteration at which R_T was reset last + bool reset_while = false; //Should the while loop be reset for some reason? + + #if IDRSTAB_ACCURATE + VectorType Q(S, 1); //Vector containing the row-scaling applied to sigma + VectorType P(S, 1); //Vector containing the column-scaling applied to sigma + DenseMatrixTypeCol QAP(S, S); //Scaled sigma + bool repair_flag = false; + RealScalar residual_0 = tol_error; + //bool apply_r_exit = false; + #endif + + while (k < maxIters) + { + + for (Index j = 1; j <= L; ++j) + { + //Cache some indexing variables that occur frequently and are constant. + const Index Nj = N * j; + const Index Nj_plus_1 = N * (j + 1); + const Index Nj_min_1 = N * (j - 1); + + /* + The IDR Step + */ + //Construction of the sigma-matrix, and the decomposition of sigma. + for (Index i = 0; i < S; ++i) + { + sigma.col(i).noalias() = AR_T * precond.solve(U.block(Nj_min_1, i, N, 1)); + } + /* + Suspected is that sigma could be badly scaled, since causes alpha~=0, but the + update vector is not zero. To improve stability we scale with absolute row and col sums first. + Sigma can become badly scaled (but still well-conditioned) for the ASML matrices, where sigma~=0 happens. + A bad sigma also happens if R_T is not chosen properly, for example if R_T is zeros sigma would be zeros as well. + The effect of this is a left-right preconditioner, instead of solving Ax=b, we solve + Q*A*P*inv(P)*b=Q*b. + */ + #if IDRSTAB_ACCURATE + Q = (sigma.cwiseAbs().rowwise().sum()).cwiseInverse(); //Calculate absolute inverse row sum + QAP = Q.asDiagonal() * sigma; //Scale with inverse absolute row sums + P = (QAP.cwiseAbs().colwise().sum()).cwiseInverse(); //Calculate absolute inverse column sum + QAP = QAP * P.asDiagonal(); //Scale with inverse absolute column sums + lu_solver.compute(QAP); + #else + lu_solver.compute(sigma); + #endif + //Obtain the update coefficients alpha + if (j != 1) + { + //alpha=inverse(sigma)*(AR_T*r_{j-2}) + #if IDRSTAB_ACCURATE + alpha.noalias() = lu_solver.solve(Q.asDiagonal() * AR_T * precond.solve(r.segment(N * (j - 2), N))); + #else + alpha.noalias() = lu_solver.solve(AR_T * precond.solve(r.segment(N * (j - 2), N))); + #endif + } + else + { + //alpha=inverse(sigma)*(R_T*r_0); + #if IDRSTAB_ACCURATE + alpha.noalias() = lu_solver.solve(Q.asDiagonal() * R_T * r.head(N)); + #else + alpha.noalias() = lu_solver.solve(R_T * r.head(N)); + #endif + } + //Unscale the solution + #if IDRSTAB_ACCURATE + alpha = P.asDiagonal() * alpha; + #endif + //TODO: Check if another badly scaled scenario exists + + double old_res = tol_error; + + //Obtain new solution and residual from this update + update.noalias() = U.topRows(N) * alpha; + r.head(N) -= mat * precond.solve(update); + x += update; + + for (Index i = 1; i <= j - 2; ++i) + { + //This only affects the case L>2 + r.segment(N * i, N) -= U.block(N * (i + 1), 0, N, S) * alpha; + } + if (j > 1) + { + //r=[r;A*r_{j-2}] + r.segment(Nj_min_1, N).noalias() = mat * precond.solve(r.segment(N * (j - 2), N)); + } + #if IDRSTAB_DEBUG_INFO >1 + #if IDRSTAB_INF_NORM + std::cout << "r.head(N).template lpNorm()380: " << r.head(N).template lpNorm() << std::endl; + std::cout << "update.norm()" << update.template lpNorm() << std::endl; + std::cout << "update.norm()/old_res" << update.template lpNorm() / old_res << std::endl; + #else + std::cout << "r.head(N).norm()380: " << r.head(N).norm() << std::endl; + std::cout << "update.norm()" << update.norm() << std::endl; + std::cout << "update.norm()/old_res" << update.norm() / old_res << std::endl; + #endif + std::cout << "QAP\n" << QAP << std::endl; + std::cout << "sigma\n" << sigma << std::endl; + std::cout << "alpha380: " << alpha << std::endl; + + //If alpha380 is the zero vector, this can be caused by R0 being no good. + //It turns out that the update can be significant, even if alpha~=0, + //this suggests sigma can just be really badly scaled + //This was also found in the early version by Mischa, but thought it was resolved. + //If R_T=identity, then sigma is all zeros. + //A "bad" sigma can be obtained by choosing: + //R_T = DenseMatrixTypeRow::Identity(S, N); + //R_T = R_T+1e-12*DenseMatrixTypeRow::Random(S, N); + #endif + #if IDRSTAB_DEBUG_INFO >2 + VectorType x_unscaled; + VectorType x_scaled; + VectorType b = VectorType::Random(S, 1); + x_unscaled = sigma.fullPivLu().solve(b); + x_scaled = P.asDiagonal() * QAP.fullPivLu().solve(Q.asDiagonal() * b); + std::cout << "x_unscaled\n" << x_unscaled << std::endl; + std::cout << "x_scaled\n" << x_scaled << std::endl; + #endif + #if IDRSTAB_INF_NORM + tol_error = r.head(N).template lpNorm(); + #else + tol_error = r.head(N).norm(); + #endif + if (tol_error < tol2) + { + //If at this point the algorithm has converged, exit. + reset_while = true; + break; + } + #if IDRSTAB_ACCURATE + if (repair_flag==false && tol_error > 10 * residual_0) + { + //Sonneveld's repair flag suggestion from [5] + //This massively reduces problems with false residual estimates (if they'd occur) + repair_flag = true; + #if IDRSTAB_DEBUG_INFO > 0 + std::cout << "repair flag set true, iter: " << k << std::endl; + #endif + } + if (repair_flag && 1000 * tol_error < residual_0) + { + r.head(N) = rhs - mat * precond.solve(x); + repair_flag = false; + #if IDRSTAB_DEBUG_INFO > 0 + std::cout << "repair flag reset iter: " << k << std::endl; + #endif + } + #endif + bool reset_R_T = false; + #if IDRSTAB_INF_NORM + if (alpha.template lpNorm() * rhs_norm < S * NumTraits::epsilon() * old_res) + #else + if (alpha.norm() * rhs_norm < S * NumTraits::epsilon() * old_res) + #endif + { + //This would indicate the update computed by alpha did nothing much to decrease the residual + //apparantly we've also not converged either. + //TODO: Check if there is some better criterion, the current one is a bit handwavy. + reset_R_T = true; + } + + if (reset_R_T) + { + if (k - rt_counter > 0) + { + /* + Only regenerate if it didn't already happen this iteration. + */ + #if IDRSTAB_DEBUG_INFO >1 + std::cout << "Generating new R_T" << std::endl; + #endif + //Choose new R0 and try again + qr.compute(DenseMatrixTypeCol::Random(N, S)); + R_T = (qr.householderQ() * DenseMatrixTypeCol::Identity(N, S)).transpose(); //.adjoint() vs .transpose() makes no difference, R_T is random anyways. + /* + Additionally, the matrix (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the anti-distributivity property of the adjoint. + This results in AR_T, which can be precomputed. + */ + AR_T = DenseMatrixTypeRow(R_T * mat); + j = 0; + rt_counter = k; + continue; + //reset_while = true; + //break; + } + } + bool break_normalization=false; + for (Index q = 1; q <= S; ++q) + { + if (q != 1) + { + //u=[u_1;u_2;...;u_j] + u.head(Nj) = u.segment(N, Nj); + } + else + { + //u = r; + u.head(Nj_plus_1) = r.topRows(Nj_plus_1); + } + //Obtain the update coefficients beta implicitly + //beta=lu_sigma.solve(AR_T * u.block(Nj_min_1, 0, N, 1) + #if IDRSTAB_ACCURATE + u.head(Nj) -= U.topRows(Nj) * P.asDiagonal() * lu_solver.solve(Q.asDiagonal() * AR_T * precond.solve(u.segment(Nj_min_1, N))); + #else + u.head(Nj) -= U.topRows(Nj) * lu_solver.solve(AR_T * precond.solve(u.segment(Nj_min_1, N))); + #endif + //u=[u;Au_{j-1}] + u.segment(Nj, N).noalias() = mat * precond.solve(u.segment(Nj_min_1, N)); + + //Orthonormalize u_j to the columns of V_j(:,1:q-1) + if (q > 1) + { + /* + Original Gram-Schmidt-like procedure to make u orthogonal to the columns of V from Ref. 1. + + The vector mu from Ref. 1 is obtained implicitly: + mu=V.block(Nj, 0, N, q - 1).adjoint() * u.block(Nj, 0, N, 1). + */ + #if IDRSTAB_ACCURATE==false + u.head(Nj_plus_1) -= V.topLeftCorner(Nj_plus_1, q - 1) * (V.block(Nj, 0, N, q - 1).adjoint() * u.segment(Nj, N)); + #else + /* + The same, but using MGS instead of GS + */ + DenseMatrixTypeCol h(1, 1); //Eventhough h should be Scalar, casting from a 1x1 matrix to Scalar is not supported. + for (Index i = 0; i <= q - 2; ++i) + { + //"Normalization factor" + h = V.block(Nj, i, N, 1).adjoint() * V.block(Nj, i, N, 1); + + //"How much do u and V have in common?" + h = V.block(Nj, i, N, 1).adjoint() * u.segment(Nj, N) / h(0, 0); + + //"Subtract the part they have in common" + u.head(Nj_plus_1) -= h(0, 0) * V.block(0, i, Nj_plus_1, 1); + //std::cout << "h\n" << h << std::endl; + } + #endif + } + #if IDRSTAB_ACCURATE + //Normalize u and assign to a column of V + Scalar normalization_constant = u.block(Nj, 0, N, 1).norm(); + + if (normalization_constant != 0.0) + { + /* + If u is exactly zero, this will lead to a NaN. Small, non-zero u is fine. In the case of NaN the algorithm breaks down, + eventhough it could have continued, since u zero implies that there is no further update in a given direction. + */ + u.head(Nj_plus_1) /= normalization_constant; + } + else + { + // iters = k; + // x = precond.solve(x); + // tol_error = tol_error / rhs_norm; + + // #if IDRSTAB_DEBUG_INFO >0 + // std::cout << "normalization_constant==0 exit" << std::endl; + // std::cout << "Estimated relative residual: " << tol_error << std::endl; + // #if IDRSTAB_INF_NORM + // std::cout << "True relative residual: " << (mat * x - rhs).template lpNorm() / rhs.template lpNorm() << + // std::endl; + // #else + // std::cout << "True relative residual: " << (mat * x - rhs).norm() / rhs.norm() << std::endl; + // #endif + u.head(Nj_plus_1).setZero(); + //u.head(Nj_plus_1).setRandom(); //TODO: bij gebrek aan beter >_> + //u.head(Nj_plus_1)=u.head(Nj_plus_1)/u.head(Nj_plus_1).norm(); + #if IDRSTAB_DEBUG_INFO >0 + std::cout << "normalization_constant==0" << std::endl; + std::cout << "tol_error/rhs_norm: " <::epsilon()) + //if(tol_error < tol2) + { + //Just quit, we've converged + iters = k; + x = precond.solve(x); + tol_error = tol_error / rhs_norm; + tol_error = 0.0; + #if IDRSTAB_DEBUG_INFO >0 + std::cout << "normalization_constant==0 EXIT" << std::endl; + #endif + return true; + } + //#endif + //TODO: This happens if A is singular. We may need to return the best solution and quit. + //Currently this can happen many many times in a row (although never for the unit tests). + //apply_r_exit = true; + //break; + break_normalization=true; + break; + //return true; //TODO + //return false; + } + + V.block(0, q - 1, Nj_plus_1, 1).noalias() = u.head(Nj_plus_1); + // if(break_normalization) + // { + // break; + // } + #else + //Since the segment u.head(Nj_plus_1) is not needed next q-iteration this may be combined into one (Only works for GS method, not MGS): + V.block(0, q - 1, Nj_plus_1, 1).noalias() = u.head(Nj_plus_1) / u.segment(Nj, N).norm(); + #endif + + #if IDRSTAB_DEBUG_INFO >1 + { + std::cout << "New u should be orthonormal to the columns of V" << std::endl; + DenseMatrixTypeCol u_V_orthonormality = V.block(Nj, 0, N, q).adjoint() * u.block(Nj, 0, N, 1); + std::cout << u_V_orthonormality << std::endl; //OK + if (u_V_orthonormality(0, 0) - u_V_orthonormality(0, 0) != 0.0) + { + + std::cout << "Internal state:" << std::endl; + std::cout << "V:\n" << V << std::endl; + std::cout << "U:\n" << U << std::endl; + std::cout << "r:\n" << r << std::endl; + std::cout << "u:\n" << u << std::endl; + std::cout << "x:\n" << x << std::endl; + //std::cout<<"mat:\n "<1 + //This should be identity, since the columns of V are orthonormalized. + std::cout << "Check if the columns of V are orthonormalized" << std::endl; + std::cout << V.block(Nj, 0, N, S).adjoint()* V.block(Nj, 0, N, S) << std::endl; + #endif + if (break_normalization==false) + { + U = V; + } + + } + if (reset_while) + { + reset_while = false; + #if IDRSTAB_INF_NORM + tol_error = r.head(N).template lpNorm(); + #else + tol_error = r.head(N).norm(); + #endif + if (tol_error < tol2) + { + /* + Slightly early exit by moving the criterion before the update of U, + after the main while loop the result of that calculation would not be needed. + */ + break; + } + continue; + } + + //r=[r;mat*r_{L-1}] + //Save this in rHat, the storage form for rHat is more suitable for the argmin step than the way r is stored. + //In Eigen 3.4 this step can be compactly done via: rHat = r.reshaped(N, L + 1); + r.segment(N * L, N).noalias() = mat * precond.solve(r.segment(N * (L - 1), N)); + for (Index i = 0; i <= L; ++i) + { + rHat.col(i) = r.segment(N * i, N); + } + + /* + The polynomial step + */ + #if IDRSTAB_ACCURATE + qr_solver.compute(rHat.rightCols(L)); + gamma.noalias() = qr_solver.solve(r.head(N)); + #else + gamma.noalias() = (rHat.rightCols(L).adjoint() * rHat.rightCols(L)).llt().solve(rHat.rightCols(L).adjoint() * r.head(N)); + #endif + + //Update solution and residual using the "minimized residual coefficients" + update.noalias() = rHat.leftCols(L) * gamma; + x += update; + r.head(N) -= mat * precond.solve(update); + + //Update iteration info + ++k; + #if IDRSTAB_INF_NORM + tol_error = r.head(N).template lpNorm(); + #else + tol_error = r.head(N).norm(); + #endif + #if IDRSTAB_DEBUG_INFO > 0 + // if(apply_r_exit) + // { + // //u normalization failed, cannot continue with next iteration? + // iters = k; + // x = precond.solve(x); + // tol_error = tol_error / rhs_norm; + // return true; + // } + if(k % 10 == 0) + { + std::cout << "Current residual: " << tol_error / rhs_norm << " iter: " << k << std::endl; + } + #endif + if (tol_error < tol2) + { + //Slightly early exit by moving the criterion before the update of U, + //after the main while loop the result of that calculation would not be needed. + #if IDRSTAB_DEBUG_INFO >1 + std::cout << "tol_error break" << std::endl; + #endif + break; + } + #if IDRSTAB_ACCURATE + if (repair_flag==false && tol_error > 10 * residual_0) + { + //Sonneveld's repair flag suggestion from [5] + //This massively reduces problems with false residual estimates (if they'd occur) + repair_flag = true; + #if IDRSTAB_DEBUG_INFO > 0 + std::cout << "repair flag set true, iter: " << k << std::endl; + #endif + } + if (repair_flag && 1000 * tol_error < residual_0) + { + r.head(N) = rhs - mat * precond.solve(x); + repair_flag = false; + #if IDRSTAB_DEBUG_INFO > 0 + std::cout << "repair flag reset iter: " << k << std::endl; + #endif + } + #endif + + /* + U=U0-sum(gamma_j*U_j) + Consider the first iteration. Then U only contains U0, so at the start of the while-loop + U should be U0. Therefore only the first N rows of U have to be updated. + */ + for (Index i = 1; i <= L; ++i) + { + U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1); + } + + } + + /* + Exit after the while loop terminated. + */ + iters = k; + x = precond.solve(x); + tol_error = tol_error / rhs_norm; + #if IDRSTAB_DEBUG_INFO >0 + std::cout << "Final exit" << std::endl; + std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); + std::chrono::duration time_span = std::chrono::duration_cast> + (t2 - t1); + std::cout << "Solver time: " << time_span.count() << " seconds" << std::endl; + std::cout << "#iterations: " << k << std::endl; + std::cout << "Estimated relative residual: " << tol_error << std::endl; + #if IDRSTAB_INF_NORM + std::cout << "True relative residual: " << (mat * x - rhs).template lpNorm() / rhs.template lpNorm() << + std::endl; + #else + std::cout << "True relative residual: " << (mat * x - rhs).norm() / rhs.norm() << + std::endl; + #endif + + + #endif + + return true; + } + + } + + template< typename _MatrixType, + typename _Preconditioner = DiagonalPreconditioner > + class IDRStab; + + namespace internal + { + + template< typename _MatrixType, typename _Preconditioner> + struct traits > + { + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; + }; + + } + + template< typename _MatrixType, typename _Preconditioner> + class IDRStab : public IterativeSolverBase > + { + typedef IterativeSolverBase Base; + using Base::matrix; + using Base::m_error; + using Base::m_iterations; + using Base::m_info; + using Base::m_isInitialized; + Index m_L = 2; + Index m_S = 4; + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + + public: + + /** Default constructor. */ + IDRStab() : Base() {} + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + + This constructor is a shortcut for the default constructor followed + by a call to compute(). + + \warning this class stores a reference to the matrix A as well as some + precomputed values that depend on it. Therefore, if \a A is changed + this class becomes invalid. Call compute() to update it with the new + matrix A, or modify a copy of A. + */ + template + explicit IDRStab(const EigenBase& A) : Base(A.derived()) {} + + ~IDRStab() {} + + /** \internal */ + /** Loops over the number of columns of b and does the following: + 1. Sets the tolerance and maxIterations + 2. Calls the function that has the core solver routine + */ + // template + // void _solve_with_guess_impl(const Rhs& b, Dest& x) const + // { + // _solve_vector_with_guess_impl(b, x); + // } + // using Base::_solve_impl; + // template + // void _solve_impl(const MatrixBase& b, Dest& x) const + // { + + // x.resize(this->rows(), b.cols()); + // x.setZero(); + // //x.setRandom(); + // //TODO:This may break if b contains multiple columns, but is probably better than choosing zeros. + // //Random is more reliable than zeros, one can find cases where MATLAB's bicgstabl does not converge with a zero guess either, but does with random. + // //Unit tests pass more often with random compared to zero. + // //x = Base::m_preconditioner.solve(b); + // // for (Index i = 0; i < b.cols(); ++i) + // // { + // // x.col(i) = Base::m_preconditioner.solve(b.col(i)); + // // } + // _solve_with_guess_impl(b, x); + // } + + template + void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const + { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + bool ret = internal::idrstab(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, + m_L, m_S); + + m_info = (!ret) ? NumericalIssue + : m_error <= 10 * Base::m_tolerance ? Success + : NoConvergence; + + #if IDRSTAB_DEBUG_INFO >0 + std::cout << "Matrix size: " << b.rows() << std::endl; + std::cout << "ret: " << ret << std::endl; + std::cout << "m_error: " << m_error << std::endl; + std::cout << "Base::m_tolerance: " << Base::m_tolerance << std::endl; + std::cout << "m_info: " << m_info << std::endl; + if (m_info != Success) + { + if (m_error < Base::m_tolerance * 10) + { + std::cout << "Unsatisfactory abort" << std::endl; + } + else + { + std::cout << "Legitimate abort" << std::endl; + } + + } + #endif + #if SAVE_FAILS + #if IDRSTAB_INF_NORM + if ((b - matrix()*x).template lpNorm() / b.template lpNorm() > 1e-6) + #else + if ((b - matrix()*x).norm() / b.norm() > 1e-6) + #endif + { + #if IDRSTAB_INF_NORM + std::cout << "True residual bad: " << (b - matrix()*x).template lpNorm() / b.template lpNorm() << std::endl; + #else + std::cout << "True residual bad: " << (b - matrix()*x).norm() / b.norm() << std::endl; + #endif + using namespace std::chrono; + typedef typename Dest::Scalar Scalar; + typedef Matrix VectorType; + int64_t timestamp = duration_cast(system_clock::now().time_since_epoch()).count(); + Eigen::SparseMatrix A = matrix(); + Eigen::SparseLU, Eigen::COLAMDOrdering > solver; + // fill A and b; + // Compute the ordering permutation vector from the structural pattern of A + solver.analyzePattern(A); + // Compute the numerical factorization + solver.factorize(A); + //Use the factors to solve the linear system + VectorType x_LU = solver.solve(b); + Eigen::saveMarket(A, "A_" + std::to_string(timestamp)); + Eigen::saveMarketVector(b, "b_" + std::to_string(timestamp)); + Eigen::saveMarketVector(x_LU, "x_LU_" + std::to_string(timestamp)); + Eigen::saveMarketVector(x, "x_IDRSTAB_" + std::to_string(timestamp)); + std::cout << "Segfault: to trigger the debugger" << std::endl; + //*(char*)0 = 0; + } + #endif + + } + + /** \internal */ + //TODO: Should setL and setS be supported? Or should the defaults L=2,S=4 be adopted? + void setL(Index L) + { + //Truncate to even number + L = (L / 2) * 2; + + //L must be positive + L = L < 1 ? 2 : L; + + //L may not exceed 8 + L = L > 8 ? 8 : L; + + m_L = L == 6 ? 4 : L; + } + void setS(Index S) + { + //Truncate to even number + S = (S / 2) * 2; + + //S must be positive + S = S < 1 ? 2 : S; + + //S may not exceed 8 + S = S > 8 ? 8 : S; + + m_S = S == 6 ? 4 : S; + } + + protected: + + }; + +} + +#endif /* idrstab_h */ diff --git a/test/.directory b/test/.directory new file mode 100644 index 000000000..db32f0b2f --- /dev/null +++ b/test/.directory @@ -0,0 +1,4 @@ +[Dolphin] +Timestamp=2020,2,25,15,45,3 +Version=4 +ViewMode=1 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d6ef17a70..49bba1847 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -265,6 +265,7 @@ ei_add_test(simplicial_cholesky) ei_add_test(conjugate_gradient) ei_add_test(incomplete_cholesky) ei_add_test(bicgstab) +ei_add_test(idrstab) ei_add_test(lscg) ei_add_test(sparselu) ei_add_test(sparseqr) @@ -388,9 +389,9 @@ if(EIGEN_TEST_CUDA) find_package(CUDA 5.0) if(CUDA_FOUND) - + set(CUDA_PROPAGATE_HOST_FLAGS OFF) - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) endif() if(EIGEN_TEST_CUDA_CLANG) @@ -401,9 +402,9 @@ if(CUDA_FOUND) endforeach() endif() set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") - + ei_add_test(gpu_basic) - + unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) endif() @@ -418,8 +419,8 @@ if (EIGEN_TEST_HIP) set(HIP_PATH "/opt/rocm/hip" CACHE STRING "Path to the HIP installation.") if (EXISTS ${HIP_PATH}) - - list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake) + + list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake) find_package(HIP REQUIRED) if (HIP_FOUND) @@ -433,21 +434,21 @@ if (EIGEN_TEST_HIP) set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") ei_add_test(gpu_basic) unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) - + elseif (${HIP_PLATFORM} STREQUAL "nvcc") message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen") else () message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}") endif() - + endif() - + else () message(FATAL_ERROR "EIGEN_TEST_HIP is ON, but the specified HIP_PATH (${HIP_PATH}) does not exist") - + endif() - + endif() option(EIGEN_TEST_BUILD_DOCUMENTATION "Test building the doxygen documentation" OFF) diff --git a/test/idrstab.cpp b/test/idrstab.cpp new file mode 100644 index 000000000..6d2ac681d --- /dev/null +++ b/test/idrstab.cpp @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse_solver.h" +#include + +template void test_idrstab_T() +{ + int L = 2; + int S = 4; + //for(int L=1; L<=8; L*=2) + { + //for(int S=1; S<=8; S*=2) + { + IDRStab, DiagonalPreconditioner > idrstab_colmajor_diag; + IDRStab, IdentityPreconditioner> idrstab_colmajor_I; + IDRStab, IncompleteLUT > idrstab_colmajor_ilut; + + idrstab_colmajor_diag.setL(L); + idrstab_colmajor_diag.setS(S); + idrstab_colmajor_I.setL(L); + idrstab_colmajor_I.setS(S); + idrstab_colmajor_ilut.setL(L); + idrstab_colmajor_ilut.setS(S); + + idrstab_colmajor_diag.setTolerance(NumTraits::epsilon() * 4); + idrstab_colmajor_I.setTolerance(NumTraits::epsilon() * 4); + idrstab_colmajor_ilut.setTolerance(NumTraits::epsilon() * 4); + + CALL_SUBTEST( check_sparse_square_solving(idrstab_colmajor_diag)); + CALL_SUBTEST( check_sparse_square_solving(idrstab_colmajor_I)); + CALL_SUBTEST( check_sparse_square_solving(idrstab_colmajor_ilut)); + } + } +} + +EIGEN_DECLARE_TEST(idrstab) +{ + CALL_SUBTEST_1((test_idrstab_T()) ); + CALL_SUBTEST_2((test_idrstab_T, int>())); + CALL_SUBTEST_3((test_idrstab_T())); +} -- GitLab From 047cc1cbed5be228124a529555a385b761a1be78 Mon Sep 17 00:00:00 2001 From: jwehner Date: Mon, 4 May 2020 16:13:31 +0200 Subject: [PATCH 02/18] deleted .directory file from repo --- test/.directory | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 test/.directory diff --git a/test/.directory b/test/.directory deleted file mode 100644 index db32f0b2f..000000000 --- a/test/.directory +++ /dev/null @@ -1,4 +0,0 @@ -[Dolphin] -Timestamp=2020,2,25,15,45,3 -Version=4 -ViewMode=1 -- GitLab From cda31fda4f94008fa349bbe3fba87988b06ec367 Mon Sep 17 00:00:00 2001 From: Chris Date: Thu, 7 May 2020 16:10:59 +0200 Subject: [PATCH 03/18] Added BiCGStab(L) solver the work in progress version --- Eigen/IterativeLinearSolvers | 1 + Eigen/src/IterativeLinearSolvers/BiCGSTABL.h | 684 +++++++++++++++++++ test/CMakeLists.txt | 1 + test/bicgstabl.cpp | 34 + 4 files changed, 720 insertions(+) create mode 100755 Eigen/src/IterativeLinearSolvers/BiCGSTABL.h create mode 100644 test/bicgstabl.cpp diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers index 5d757e378..0320895ea 100644 --- a/Eigen/IterativeLinearSolvers +++ b/Eigen/IterativeLinearSolvers @@ -41,6 +41,7 @@ #include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" #include "src/IterativeLinearSolvers/BiCGSTAB.h" #include "src/IterativeLinearSolvers/IDRStab.h" +#include "src/IterativeLinearSolvers/BiCGSTABL.h" #include "src/IterativeLinearSolvers/IncompleteLUT.h" #include "src/IterativeLinearSolvers/IncompleteCholesky.h" diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h new file mode 100755 index 000000000..532931650 --- /dev/null +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h @@ -0,0 +1,684 @@ +/* + Chris Schoutrop & Adithya Vijaykumar + -working concept method on 27/02/2019 + -support for complex numbers + -converted to Eigen instead of std::vector + -BLAS2 operation for vector updates + -Right preconditioning + -fixed issue when preconditioner is "too good" and initial guess is zeros; + precond.solve(x) would always be zero. Setting x to random initially works. + There should be a more mathematical derivation on why and how for this though. + -used Eigen's QR decomposition to simplify MR part + -Added condition on L for small matrices (use L=length of x if L>length of x) + -Added fix if r0 becomes orthogonal to rShadow + -Added condition to stop computing if rho1 becomes nan/inf + -Added termination condition if residual is low enough during BiCG step. + -Removed fix if r0 becomes orthogonal to rShadow + -Added reliable updating strategy from enhanced bicgstabl paper + -Initial version of the convex combination from enhanced bicgstabl paper + -Added defines to switch between original, householder QR and convex combination methods for argmin (stab-step) step. + -Added define to switch between left and right preconditioning, note that right-preconditioning may not be compatible currently with reliable update step. + + + Right-preconditioning is applied here; Ax=b->AMinvu=b. + instead of solving for x, we solve for MInv*u + Then at the end x can be obtained via x=MInv*x + See Algorithm 9.5 from Iterative methods for sparse linear systems for inspiration. + + Known issues: + -See TODO's + -If L is too large (~20) instabilities occur. + -Residual gap can be significant (1e3 or more) if 1000+ iterations are needed. This is likely an algorithmic issue, not implementation issue. + In case of IDR(s)Stab(L) this is shown in the paper of Kensuke. + => To fix this, consider the variant of BiCGStab(L) given in Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving + linear systems of equations. Universiteit Utrecht. Mathematisch Instituut, 1996. + + This implementation of BiCGStab(L) is based on the papers + General algorithm: + 1. G.L.G. Sleijpen, D.R. Fokkema. (1993). BiCGstab(l) for linear equations involving unsymmetric matrices with complex spectrum. Electronic Transactions on Numerical Analysis. + Polynomial step update: + 2. G.L.G. Sleijpen, M.B. Van Gijzen. (2010) Exploiting BiCGstab(l) strategies to induce dimension reduction SIAM Journal on Scientific Computing. + 3. Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving linear systems of equations. Universiteit Utrecht. Mathematisch Instituut, 1996 +*/ + +#ifndef BiCGSTABL_h +#define BiCGSTABL_h + +#include +#include +#include +#include + +#define BICGSTABL_IN_LIBRARY 1 //1 is needed to have bicgstab(L) compile with Eigen, as part of the library for the unit tests. 0 to have it as a plugin for the test suite and in Plasimo. +//TODO: Figure out a better fix. + +#define BICGSTABL_REL_UPDATE 1 //0=no reliable update strategy, 1=reliable update strategy +#define BICGSTABL_ALGORITHM 0 //0: original, 1: Householder QR, 2: Convex polynomial method +#define BICGSTABL_PRECOND 1 //0: right, 1: left. 0 may be broken, 1 is checked against Fortrain implementation of Enhanced bicgstabl +/* + Technically it is possible to change these strategies at runtime, however this would also require the user (or some sort of automated criterion) to know why and when certain methods are best. + Currently this is done via defines to eliminate any possible contamination between the methods. +*/ +#define BICGSTABL_DEBUG_INFO 0 //Print info to console about the problem being solved. + +#if BICGSTABL_DEBUG_INFO +#include +#endif +namespace Eigen +{ + + namespace internal + { + + template + bool BiCGSTABL(const MatrixType& mat, const Rhs& rhs, Dest& x, + const Preconditioner& precond, Index& iters, + typename Dest::RealScalar& tol_error, Index L) + { + #if BICGSTABL_DEBUG_INFO + std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); + std::cout << "Matrix size: " << mat.rows() << std::endl; + #endif + using std::sqrt; + using std::abs; + typedef typename Dest::RealScalar RealScalar; + typedef typename Dest::Scalar Scalar; + Index N = rhs.size(); + L = L < x.rows() ? L : x.rows(); + + Index k = 0; + + RealScalar tol = tol_error; + Index maxIters = iters; + + typedef Matrix VectorType; + typedef Matrix DenseMatrixType; + + //We start with an initial guess x_0 and let us set r_0 as (residual calculated from x_0) + #if BICGSTABL_PRECOND==0 + VectorType r0 = rhs - precond.solve(mat * x); //r_0 + #elif BICGSTABL_PRECOND==1 + VectorType r0 = precond.solve(rhs - mat * x); //r_0 + #endif + + //rShadow is arbritary, but must not be orthogonal to r0. + VectorType rShadow = r0; + + //RealScalar rShadow_sqnorm = rShadow.squaredNorm(); + //RealScalar rhs_sqnorm = rhs.squaredNorm(); + + VectorType x_prime = x; + x.setZero(); + VectorType b_prime = r0; + + //Other vectors and scalars initialization + Scalar rho0 = 1.0; + Scalar alpha = 0.0; + Scalar omega = 1.0; + + //RealScalar tol2 = tol * tol * rShadow_sqnorm; + //RealScalar eps2 = NumTraits::epsilon() * NumTraits::epsilon(); + + DenseMatrixType rHat(N, L + 1); + DenseMatrixType uHat(N, L + 1); + + rHat.col(0) = r0; + uHat.col(0).fill(0.0); + + //Index restarts = 0; + bool bicg_convergence = false; + + //RealScalar zeta0 = r0.norm(); + RealScalar zeta0 = rhs.norm(); + RealScalar zeta = zeta0; + RealScalar Mx = zeta0; + RealScalar Mr = zeta0; + + const RealScalar delta = 0.01; + + bool compute_res = false; + bool update_app = false; + + //Bool to signal that a new rShadow was chosen and the main loop should be restarted. + //bool reset = false; + + while ( zeta > tol * zeta0 && k < maxIters ) + { + rho0 = -omega * rho0; + + for (Index j = 0; j <= L - 1; ++j) + { + Scalar rho1 = rShadow.dot(rHat.col(j)); + + // if (abs(rho1) < eps2 * rShadow_sqnorm) + // { + // // The new residual vector became too orthogonal to the arbitrarily chosen direction rShadow + // // Let's restart with a new rShadow + // std::cout << "Orthogonality reset triggered" << std::endl; + + // if (k != 0) + // { + // //Choose a new rShadow based on the current solution + // r0 = precond.solve(rhs - mat * x); + // rShadow = r0; + // rho1 = rShadow_sqnorm = rShadow.squaredNorm(); + // rHat.col(0) = r0; + // uHat.col(0).fill(0.0); + // } + // else + // { + // //During the first iteration there is no reason to reset, as nothing would change. + // //Might as well try with a random rShadow instead, as it will be very unlikely to be orthogonal to any vector. + // rShadow.setRandom(); + // } + + // if (restarts++ == 0) + // { + // k = 0; + // } + + // reset = true; + // break; + // } + + if (rho1 - rho1 != 0.0 || rho0 == 0.0) + { + std::cout << "Internal BiCGStab(L) parameter became unfit to continue computing" << std::endl; + tol_error = zeta / zeta0; + return false; + } + + Scalar beta = alpha * (rho1 / rho0); + rho0 = rho1; + + //Update search directions + for (Index i = 0; i <= j; ++i) + { + + uHat.col(i) = rHat.col(i) - beta * uHat.col(i); + } + + #if BICGSTABL_PRECOND==0 + uHat.col(j + 1) = mat * precond.solve(uHat.col(j)); + #elif BICGSTABL_PRECOND==1 + uHat.col(j + 1) = precond.solve(mat * uHat.col(j)); + #endif + alpha = rho1 / (rShadow.dot(uHat.col(j + 1))); + + //Update residuals + for (Index i = 0; i <= j; ++i) + { + rHat.col(i) = rHat.col(i) - alpha * uHat.col(i + 1); + } + + #if BICGSTABL_PRECOND==0 + rHat.col(j + 1) = mat * precond.solve(rHat.col(j)); + #elif BICGSTABL_PRECOND==1 + rHat.col(j + 1) = precond.solve(mat * rHat.col(j)); + #endif + + //Complete BiCG iteration by updating x + x = x + alpha * uHat.col(0); + + //Check for early exit + zeta = rHat.col(0).norm(); + + if (zeta < tol * zeta0) + { + /* + Convergence was achieved during BiCG step. + Without this check BiCGStab(L) fails for trivial matrices, such as when the preconditioner already is the inverse, + or the input matrix is identity. + */ + bicg_convergence = true; + break; + } + } + + #if BICGSTABL_ALGORITHM==0 + + if (bicg_convergence == false) + { + DenseMatrixType tau(L + 1, L + 1); + VectorType sigma(L + 1); + VectorType gamma(L + 1); + VectorType gammaP(L + 1); + VectorType gammaPP(L); + /* + //TODO: Currently the first elements of tau,gamma, gammaP and gammaPP are never used + these do take up (a small amount of) memory. Changing indices to 0-based leads to deviations in notation with the Bicgstabl paper + and may be confusing. + */ + + for (Index j = 1; j <= L; ++j) + { + for (Index i = 1; i <= j - 1; ++i) + { + tau(i, j) = (rHat.col(i).dot(rHat.col(j))) / sigma(i); + rHat.col(j) -= tau(i, j) * rHat.col(i); + } + + sigma(j) = rHat.col(j).squaredNorm(); + gammaP(j) = (rHat.col(j).dot(rHat.col(0))) / sigma(j); + + } + + gamma(L) = gammaP(L); + omega = gamma(L); + + //TODO: store tau as triangular, perform backsubstitution with solve + //instead of explicit summation. + for (Index j = L - 1; j >= 1; --j) + { + Scalar sum = 0.0; + + for (Index i = j + 1; i <= L; ++i) + { + sum += tau(j, i) * gamma(i); + } + + gamma(j) = gammaP(j) - sum; + } + + + //TODO: See if this can be done without nested loops and explicit sum + for (Index j = 1; j <= L - 1; ++j) + { + Scalar sum = 0.0; + + for (Index i = j + 1; i <= L - 1; ++i) + { + sum += tau(j, i) * gamma(i + 1); + } + + gammaPP(j) = gamma(j + 1) + sum; + } + + x += gamma(1) * rHat.col(0); + rHat.col(0) -= gammaP(L) * rHat.col(L); + uHat.col(0) -= gamma(L) * uHat.col(L); + + x += rHat.block(0, 1, N, L - 1) * gammaPP.segment(1, L - 1); + uHat.col(0) -= uHat.block(0, 1, N, L - 1) * gamma.segment(1, L - 1); + rHat.col(0) -= rHat.block(0, 1, N, L - 1) * gammaP.segment(1, L - 1); + + } + + #elif BICGSTABL_ALGORITHM==1 + + if (bicg_convergence == false) //(L == 1) + { + /* + //TODO: Compare this with the Fortran code step by step in the Enhanced bicgstabL paper, before any hard conclusions are drawn about this section. + */ + /* + The polynomial/minimize residual step. + + QR Householder method for argmin is more stable than (modified) Gram-Schmidt, in the sense that there is less loss of orthogonality. + It is more accurate than solving the normal equations, since the normal equations scale with condition number squared. + + This method can also be used if L>1, however the paper of Fokkema recommends the convex combination method to maintain convergence. + */ + VectorType gamma(L); + // gamma = ((rHat.block(0, 1, N, L)).adjoint() * (rHat.block(0, 1, N, + // L))).llt().solve((rHat.block(0, 1, N, L)).adjoint() * rHat.col(0)); + + // DenseMatrixType Z(L, L); + + // for (Index i = 0; i < L; ++i) + // { + // for (Index j = 0; j <= i; ++j) + // { + // Z(i, j) = rHat.col(i + 1).dot(rHat.col(j + 1)); + + // } + + // } + + //Z = (Z.template selfadjointView()); + + //gamma = Z.llt().solve(Z.col(0)); + //gamma = Z.llt().solve(rHat.block(0, 1, N, L).adjoint() * rHat.col(0)); + gamma = (rHat.block(0, 1, N, L)).householderQr().solve(rHat.col(0)); + x += rHat.block(0, 0, N, L) * gamma; + rHat.col(0) -= rHat.block(0, 1, N, L) * gamma; + uHat.col(0) -= uHat.block(0, 1, N, L) * gamma; + zeta = rHat.col(0).norm(); + omega = gamma(L - 1); + } + + #elif BICGSTABL_ALGORITHM==2 + + if (L != 1 && bicg_convergence == false) //else + { + //TODO: ADD case for L==1, refer to householder method, or analyitical bicgstab method + /* + Convex combination step + + This takes a combination of Minimum and Orthogonal residual polynomials, + resulting in a more stable determination of BiCG coefficients [Fokkema]. + */ + DenseMatrixType Z(L + 1, L + 1); + + //Z = rHat.adjoint() * rHat; + for (Index i = 0; i <= L; ++i) + { + for (Index j = 0; j <= i; ++j) + { + Z(i, j) = rHat.col(i).dot(rHat.col(j)); + + } + + } + + // //Z is Hermitian, therefore only one half has to be computed, the other half can be filled in, saving several DOTs. + Z = (Z.template selfadjointView()); + DenseMatrixType Z0 = Z.block(1, 1, L - 1, + L - 1); //Copy to ensure there is no in place decomposition taking place + //Z = (rHat.block(0, 1, N, L)).transpose() * rHat.block(0, 1, N, L); + //Z = rHat.adjoint() * rHat; + //TODO: strictly upper/lower doesnt work, so this is performing some unneeded work. + + //Determine the coefficients for the polynomials. + VectorType y0(L + 1); + VectorType yL(L + 1); + y0(0) = -1.0; + y0(L) = 0.0; + yL(0) = 0.0; + yL(L) = -1.0; + + //The block of Z is used for both y0 and yL, so the decomposition can be recycled. + //PartialPivLU lu_decomposition(Z.block(1, 1, L - 1, L - 1)); + LLT lu_decomposition(Z0); + y0.block(1, 0, L - 1, 1) = lu_decomposition.solve(Z.block(1, 0, L - 1, 1)); + yL.block(1, 0, L - 1, 1) = lu_decomposition.solve(Z.block(1, L, L - 1, 1)); + + //y0.block(1, 0, L - 1, 1) = + //yL.block(1, 0, L - 1, 1) = //Dit toch gwn met householder doen ipv normal eqns + + Scalar kappa0 = y0.adjoint() * (Z * y0); + Scalar kappaL = yL.adjoint() * (Z * yL); + kappa0 = sqrt(kappa0); + kappaL = sqrt(kappaL); + + //Combine the MR and OR versions. + Scalar rho = yL.adjoint() * (Z * y0); + rho = rho / (kappa0 * kappaL); + RealScalar abs_rho = abs(rho); + Scalar gamma_hat = (rho / abs_rho) * (abs_rho > 0.7 ? abs_rho : 0.7); + y0 = y0 - gamma_hat * (kappa0 / kappaL) * yL; + + //The norm of the residual can computed be more efficiently from y0 and Z than from rHat.col(0).norm(). + rho = y0.adjoint() * (Z * y0); + zeta = sqrt(abs(rho)); + + //if (bicg_convergence && zeta - zeta != 0) + if (zeta - zeta != 0) + { + /* + Convergence was achieved during BiCG step, this leads to extremely small values of rHat, and Z. + As a result the polynomial step can break down, however x is likely to still be usable. + */ + #if BICGSTABL_DEBUG_INFO + std::cout << "zeta breakdown" << std::endl; + //std::cout << "rHat: \n" << rHat << std::endl; + //std::cout << "x: \n" << x << std::endl; + //std::cout << "uHat: \n" << uHat << std::endl; + std::cout << "y0: \n" << y0 << std::endl; + std::cout << "yL: \n" << yL << std::endl; + std::cout << "Z: \n" << Z << std::endl; + std::cout << "zeta: \n" << zeta << std::endl; + #endif + zeta = rHat.col(0).norm(); + break; + + } + else + { + //Update + omega = y0(L); + uHat.col(0) -= uHat.block(0, 1, N, L) * y0.block(1, 0, L, 1); + x += rHat.block(0, 0, N, L) * y0.block(1, 0, L, 1); + rHat.col(0) -= rHat.block(0, 1, N, L) * y0.block(1, 0, L, 1); + } + + + + } + + #endif + + + //TODO: Duplicate update code can be removed for the L=1 and L!=1 case. + //TODO: Use analytical expression instead of householder for L=1. + k += L; + //k += 1; + + /* + Reliable update part + + The recursively computed residual can deviate from the actual residual after several iterations. However, computing the + residual from the definition costs extra MVs and should not be done at each iteration. + The reliable update strategy computes the true residual from the definition: r=b-A*x at strategic intervals. + Furthermore a "group wise update" strategy is used to combine updates, which improves accuracy. + */ + + #if BICGSTABL_REL_UPDATE + Mx = std::max(Mx, zeta); //Maximum norm of residuals since last update of x. + Mr = std::max(Mr, zeta); //Maximum norm of residuals since last computation of the true residual. + + if (zeta < delta * zeta0 && zeta0 <= Mx) + { + update_app = true; + } + + if (update_app || (zeta < delta * Mr && zeta0 <= Mr)) + { + compute_res = true; + } + + if (bicg_convergence) + { + update_app = true; + compute_res = true; + bicg_convergence = false; + } + + if (compute_res) + { + #if BICGSTABL_PRECOND==0 + rHat.col(0) = b_prime - precond.solve(mat * x); + #elif BICGSTABL_PRECOND==1 + //rHat.col(0) = b_prime - mat * x; //Fokkema paper pseudocode + //Fokkema paper Fortan code L250-254 + rHat.col(0) = mat * x; + rHat.col(0) = precond.solve(rHat.col(0)); + rHat.col(0) = b_prime - rHat.col(0); + #endif + + zeta = rHat.col(0).norm(); + Mr = zeta; + + if (update_app) + { + //After the group wise update, the original problem is translated to a shifted one. + x_prime = x_prime + x; + x.setZero(); + b_prime = rHat.col(0); + Mx = zeta; + } + } + + + compute_res = false; + update_app = false; + #endif + #if BICGSTABL_DEBUG_INFO + std::cout << "k: " << k << "res:" << zeta / zeta0 << + std::endl; + #endif + } + + //Convert internal variable to the true solution vector x + x = x_prime + x; + #if BICGSTABL_PRECOND==0 + x = precond.solve(x); + #endif + tol_error = zeta / zeta0; + //tol_error = zeta; + //tol_error = sqrt(rHat.col(0).squaredNorm() / rhs_sqnorm); + //tol_error = (mat * x - rhs).norm() / rhs.norm(); + iters = k; + + #if BICGSTABL_DEBUG_INFO + //Print experimental info + std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); + std::chrono::duration time_span = std::chrono::duration_cast> + (t2 - t1); + std::cout << "Solver time: " << time_span.count() << " seconds" << std::endl; + std::cout << "#iterations: " << k << std::endl; + std::cout << "Estimated error: " << tol_error << std::endl; + std::cout << "True error: " << (mat * x - rhs).norm() / rhs.norm(); + #endif + return true; + } + + } + + template< typename _MatrixType, + typename _Preconditioner = DiagonalPreconditioner > + class BiCGSTABL; + + namespace internal + { + + template< typename _MatrixType, typename _Preconditioner> + struct traits > + { + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; + }; + + } + + template< typename _MatrixType, typename _Preconditioner> + class BiCGSTABL : public IterativeSolverBase > + { + typedef IterativeSolverBase Base; + using Base::matrix; + using Base::m_error; + using Base::m_iterations; + using Base::m_info; + using Base::m_isInitialized; + Index m_L = 2; + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + + public: + + /** Default constructor. */ + BiCGSTABL() : Base() {} + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + + This constructor is a shortcut for the default constructor followed + by a call to compute(). + + \warning this class stores a reference to the matrix A as well as some + precomputed values that depend on it. Therefore, if \a A is changed + this class becomes invalid. Call compute() to update it with the new + matrix A, or modify a copy of A. + */ + template + explicit BiCGSTABL(const EigenBase& A) : Base(A.derived()) {} + + ~BiCGSTABL() {} + + /** \internal */ + /** Loops over the number of columns of b and does the following: + 1. sets the tolerence and maxIterations + 2. Calls the function that has the core solver routine + */ + #if BICGSTABL_IN_LIBRARY==0 + template + void _solve_with_guess_impl(const Rhs& b, Dest& x) const + { + _solve_vector_with_guess_impl(b, x); + } + #endif + + template + void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const + { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + + bool ret = internal::BiCGSTABL(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, + m_L); + #if BICGSTABL_DEBUG_INFO + std::cout << "ret: " << ret << std::endl; + std::cout << "m_error: " << m_error << std::endl; + std::cout << "Base::m_tolerance: " << Base::m_tolerance << std::endl; + #endif + + if (ret == false) + { + m_info = NumericalIssue; + x.setZero(); //x=nan does not pass Eigen's tests even if m_info=NumericalIssue :) + m_error = ((std::isfinite)(m_error) ? m_error : 1.0); + } + else + { + m_info = (m_error <= Base::m_tolerance) ? Success + : NoConvergence; + } + + // m_info = (!ret) ? NumericalIssue + // : m_error <= Base::m_tolerance ? Success + // : NoConvergence; + //m_info=NumericalIssue; + #if BICGSTABL_DEBUG_INFO + std::cout << "m_error_returned: " << m_error << std::endl; + std::cout << "m_info: " << m_info << std::endl; + #endif + // m_info = (!ret) ? NumericalIssue + // : m_error <= Base::m_tolerance ? Success + // : NoConvergence; + m_isInitialized = true; + } + + /** \internal */ + /** Resizes the x vector to match the dimenstion of b and sets the elements to zero*/ + #if BICGSTABL_IN_LIBRARY==0 + using Base::_solve_impl; + template + void _solve_impl(const MatrixBase& b, Dest& x) const + { + + x.resize(this->rows(), b.cols()); + x.setZero(); + + _solve_with_guess_impl(b, x); + } + #endif + void setL(Index L) + { + if (L < 1) + { + L = 2; + } + + m_L = L; + } + + protected: + + }; + +} + +#endif /* BiCGSTABL_h */ + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 49bba1847..d1110cf2d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -265,6 +265,7 @@ ei_add_test(simplicial_cholesky) ei_add_test(conjugate_gradient) ei_add_test(incomplete_cholesky) ei_add_test(bicgstab) +ei_add_test(bicgstabl) ei_add_test(idrstab) ei_add_test(lscg) ei_add_test(sparselu) diff --git a/test/bicgstabl.cpp b/test/bicgstabl.cpp new file mode 100644 index 000000000..7073f32ec --- /dev/null +++ b/test/bicgstabl.cpp @@ -0,0 +1,34 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "sparse_solver.h" +#include + +template void test_BiCGSTABL_T() +{ + + BiCGSTABL, DiagonalPreconditioner > BiCGSTABL_diag; + BiCGSTABL, IdentityPreconditioner> BiCGSTABL_I; + BiCGSTABL, IncompleteLUT > BiCGSTABL_ilut; + + BiCGSTABL_diag.setTolerance(NumTraits::epsilon() * 4); + BiCGSTABL_I.setTolerance(NumTraits::epsilon() * 4); + BiCGSTABL_ilut.setTolerance(NumTraits::epsilon() * 4); + + CALL_SUBTEST( check_sparse_square_solving(BiCGSTABL_diag)); + CALL_SUBTEST( check_sparse_square_solving(BiCGSTABL_I)); + CALL_SUBTEST( check_sparse_square_solving(BiCGSTABL_ilut)); +} + +EIGEN_DECLARE_TEST(bicgstabl) +{ + CALL_SUBTEST_1((test_BiCGSTABL_T()) ); + CALL_SUBTEST_2((test_BiCGSTABL_T, int>())); + CALL_SUBTEST_3((test_BiCGSTABL_T())); +} -- GitLab From e7d855ce6165ee0662d91a5505f4b3b464fdffec Mon Sep 17 00:00:00 2001 From: jwehner Date: Fri, 8 May 2020 16:41:21 +0200 Subject: [PATCH 04/18] compiles now --- Eigen/src/IterativeLinearSolvers/BiCGSTABL.h | 316 +++++++++---------- Eigen/src/IterativeLinearSolvers/IDRStab.h | 4 +- 2 files changed, 155 insertions(+), 165 deletions(-) diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h index 532931650..e4e67b84b 100755 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h @@ -41,25 +41,26 @@ 3. Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving linear systems of equations. Universiteit Utrecht. Mathematisch Instituut, 1996 */ -#ifndef BiCGSTABL_h -#define BiCGSTABL_h +#ifndef EIGEN_BICGSTABL_H +#define EIGEN_BICGSTABL_H #include #include #include #include +#include #define BICGSTABL_IN_LIBRARY 1 //1 is needed to have bicgstab(L) compile with Eigen, as part of the library for the unit tests. 0 to have it as a plugin for the test suite and in Plasimo. //TODO: Figure out a better fix. -#define BICGSTABL_REL_UPDATE 1 //0=no reliable update strategy, 1=reliable update strategy -#define BICGSTABL_ALGORITHM 0 //0: original, 1: Householder QR, 2: Convex polynomial method -#define BICGSTABL_PRECOND 1 //0: right, 1: left. 0 may be broken, 1 is checked against Fortrain implementation of Enhanced bicgstabl +#define BICGSTABL_REL_UPDATE 1 //0=no reliable update strategy, 1=reliable update strategy +#define BICGSTABL_ALGORITHM 0 //0: original, 1: Householder QR, 2: Convex polynomial method +#define BICGSTABL_PRECOND 1 //0: right, 1: left. 0 may be broken, 1 is checked against Fortrain implementation of Enhanced bicgstabl /* Technically it is possible to change these strategies at runtime, however this would also require the user (or some sort of automated criterion) to know why and when certain methods are best. Currently this is done via defines to eliminate any possible contamination between the methods. */ -#define BICGSTABL_DEBUG_INFO 0 //Print info to console about the problem being solved. +#define BICGSTABL_DEBUG_INFO 0 //Print info to console about the problem being solved. #if BICGSTABL_DEBUG_INFO #include @@ -70,17 +71,17 @@ namespace Eigen namespace internal { - template - bool BiCGSTABL(const MatrixType& mat, const Rhs& rhs, Dest& x, - const Preconditioner& precond, Index& iters, - typename Dest::RealScalar& tol_error, Index L) + template + bool BiCGSTABL(const MatrixType &mat, const Rhs &rhs, Dest &x, + const Preconditioner &precond, Index &iters, + typename Dest::RealScalar &tol_error, Index L) { - #if BICGSTABL_DEBUG_INFO +#if BICGSTABL_DEBUG_INFO std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); std::cout << "Matrix size: " << mat.rows() << std::endl; - #endif - using std::sqrt; +#endif using std::abs; + using std::sqrt; typedef typename Dest::RealScalar RealScalar; typedef typename Dest::Scalar Scalar; Index N = rhs.size(); @@ -94,12 +95,12 @@ namespace Eigen typedef Matrix VectorType; typedef Matrix DenseMatrixType; - //We start with an initial guess x_0 and let us set r_0 as (residual calculated from x_0) - #if BICGSTABL_PRECOND==0 - VectorType r0 = rhs - precond.solve(mat * x); //r_0 - #elif BICGSTABL_PRECOND==1 - VectorType r0 = precond.solve(rhs - mat * x); //r_0 - #endif +//We start with an initial guess x_0 and let us set r_0 as (residual calculated from x_0) +#if BICGSTABL_PRECOND == 0 + VectorType r0 = rhs - precond.solve(mat * x); //r_0 +#elif BICGSTABL_PRECOND == 1 + VectorType r0 = precond.solve(rhs - mat * x); //r_0 +#endif //rShadow is arbritary, but must not be orthogonal to r0. VectorType rShadow = r0; @@ -142,7 +143,7 @@ namespace Eigen //Bool to signal that a new rShadow was chosen and the main loop should be restarted. //bool reset = false; - while ( zeta > tol * zeta0 && k < maxIters ) + while (zeta > tol * zeta0 && k < maxIters) { rho0 = -omega * rho0; @@ -198,11 +199,11 @@ namespace Eigen uHat.col(i) = rHat.col(i) - beta * uHat.col(i); } - #if BICGSTABL_PRECOND==0 +#if BICGSTABL_PRECOND == 0 uHat.col(j + 1) = mat * precond.solve(uHat.col(j)); - #elif BICGSTABL_PRECOND==1 +#elif BICGSTABL_PRECOND == 1 uHat.col(j + 1) = precond.solve(mat * uHat.col(j)); - #endif +#endif alpha = rho1 / (rShadow.dot(uHat.col(j + 1))); //Update residuals @@ -211,11 +212,11 @@ namespace Eigen rHat.col(i) = rHat.col(i) - alpha * uHat.col(i + 1); } - #if BICGSTABL_PRECOND==0 - rHat.col(j + 1) = mat * precond.solve(rHat.col(j)); - #elif BICGSTABL_PRECOND==1 - rHat.col(j + 1) = precond.solve(mat * rHat.col(j)); - #endif +#if BICGSTABL_PRECOND == 0 + rHat.col(j + 1) = mat * precond.solve(rHat.col(j)); +#elif BICGSTABL_PRECOND == 1 + rHat.col(j + 1) = precond.solve(mat * rHat.col(j)); +#endif //Complete BiCG iteration by updating x x = x + alpha * uHat.col(0); @@ -235,7 +236,7 @@ namespace Eigen } } - #if BICGSTABL_ALGORITHM==0 +#if BICGSTABL_ALGORITHM == 0 if (bicg_convergence == false) { @@ -260,7 +261,6 @@ namespace Eigen sigma(j) = rHat.col(j).squaredNorm(); gammaP(j) = (rHat.col(j).dot(rHat.col(0))) / sigma(j); - } gamma(L) = gammaP(L); @@ -280,7 +280,6 @@ namespace Eigen gamma(j) = gammaP(j) - sum; } - //TODO: See if this can be done without nested loops and explicit sum for (Index j = 1; j <= L - 1; ++j) { @@ -301,10 +300,9 @@ namespace Eigen x += rHat.block(0, 1, N, L - 1) * gammaPP.segment(1, L - 1); uHat.col(0) -= uHat.block(0, 1, N, L - 1) * gamma.segment(1, L - 1); rHat.col(0) -= rHat.block(0, 1, N, L - 1) * gammaP.segment(1, L - 1); - } - #elif BICGSTABL_ALGORITHM==1 +#elif BICGSTABL_ALGORITHM == 1 if (bicg_convergence == false) //(L == 1) { @@ -347,7 +345,7 @@ namespace Eigen omega = gamma(L - 1); } - #elif BICGSTABL_ALGORITHM==2 +#elif BICGSTABL_ALGORITHM == 2 if (L != 1 && bicg_convergence == false) //else { @@ -366,15 +364,13 @@ namespace Eigen for (Index j = 0; j <= i; ++j) { Z(i, j) = rHat.col(i).dot(rHat.col(j)); - } - } // //Z is Hermitian, therefore only one half has to be computed, the other half can be filled in, saving several DOTs. Z = (Z.template selfadjointView()); DenseMatrixType Z0 = Z.block(1, 1, L - 1, - L - 1); //Copy to ensure there is no in place decomposition taking place + L - 1); //Copy to ensure there is no in place decomposition taking place //Z = (rHat.block(0, 1, N, L)).transpose() * rHat.block(0, 1, N, L); //Z = rHat.adjoint() * rHat; //TODO: strictly upper/lower doesnt work, so this is performing some unneeded work. @@ -415,23 +411,26 @@ namespace Eigen //if (bicg_convergence && zeta - zeta != 0) if (zeta - zeta != 0) { - /* +/* Convergence was achieved during BiCG step, this leads to extremely small values of rHat, and Z. As a result the polynomial step can break down, however x is likely to still be usable. */ - #if BICGSTABL_DEBUG_INFO +#if BICGSTABL_DEBUG_INFO std::cout << "zeta breakdown" << std::endl; //std::cout << "rHat: \n" << rHat << std::endl; //std::cout << "x: \n" << x << std::endl; //std::cout << "uHat: \n" << uHat << std::endl; - std::cout << "y0: \n" << y0 << std::endl; - std::cout << "yL: \n" << yL << std::endl; - std::cout << "Z: \n" << Z << std::endl; - std::cout << "zeta: \n" << zeta << std::endl; - #endif + std::cout << "y0: \n" + << y0 << std::endl; + std::cout << "yL: \n" + << yL << std::endl; + std::cout << "Z: \n" + << Z << std::endl; + std::cout << "zeta: \n" + << zeta << std::endl; +#endif zeta = rHat.col(0).norm(); break; - } else { @@ -441,13 +440,9 @@ namespace Eigen x += rHat.block(0, 0, N, L) * y0.block(1, 0, L, 1); rHat.col(0) -= rHat.block(0, 1, N, L) * y0.block(1, 0, L, 1); } - - - } - #endif - +#endif //TODO: Duplicate update code can be removed for the L=1 and L!=1 case. //TODO: Use analytical expression instead of householder for L=1. @@ -463,7 +458,7 @@ namespace Eigen Furthermore a "group wise update" strategy is used to combine updates, which improves accuracy. */ - #if BICGSTABL_REL_UPDATE +#if BICGSTABL_REL_UPDATE Mx = std::max(Mx, zeta); //Maximum norm of residuals since last update of x. Mr = std::max(Mr, zeta); //Maximum norm of residuals since last computation of the true residual. @@ -486,15 +481,15 @@ namespace Eigen if (compute_res) { - #if BICGSTABL_PRECOND==0 +#if BICGSTABL_PRECOND == 0 rHat.col(0) = b_prime - precond.solve(mat * x); - #elif BICGSTABL_PRECOND==1 +#elif BICGSTABL_PRECOND == 1 //rHat.col(0) = b_prime - mat * x; //Fokkema paper pseudocode //Fokkema paper Fortan code L250-254 rHat.col(0) = mat * x; rHat.col(0) = precond.solve(rHat.col(0)); rHat.col(0) = b_prime - rHat.col(0); - #endif +#endif zeta = rHat.col(0).norm(); Mr = zeta; @@ -509,80 +504,77 @@ namespace Eigen } } - compute_res = false; update_app = false; - #endif - #if BICGSTABL_DEBUG_INFO - std::cout << "k: " << k << "res:" << zeta / zeta0 << - std::endl; - #endif +#endif +#if BICGSTABL_DEBUG_INFO + std::cout << "k: " << k << "res:" << zeta / zeta0 << std::endl; +#endif } //Convert internal variable to the true solution vector x x = x_prime + x; - #if BICGSTABL_PRECOND==0 +#if BICGSTABL_PRECOND == 0 x = precond.solve(x); - #endif +#endif tol_error = zeta / zeta0; //tol_error = zeta; //tol_error = sqrt(rHat.col(0).squaredNorm() / rhs_sqnorm); //tol_error = (mat * x - rhs).norm() / rhs.norm(); iters = k; - #if BICGSTABL_DEBUG_INFO +#if BICGSTABL_DEBUG_INFO //Print experimental info std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); - std::chrono::duration time_span = std::chrono::duration_cast> - (t2 - t1); + std::chrono::duration time_span = std::chrono::duration_cast>(t2 - t1); std::cout << "Solver time: " << time_span.count() << " seconds" << std::endl; std::cout << "#iterations: " << k << std::endl; std::cout << "Estimated error: " << tol_error << std::endl; std::cout << "True error: " << (mat * x - rhs).norm() / rhs.norm(); - #endif +#endif return true; } - } + } // namespace internal - template< typename _MatrixType, - typename _Preconditioner = DiagonalPreconditioner > + template < typename _MatrixType, + typename _Preconditioner = DiagonalPreconditioner > class BiCGSTABL; namespace internal { - template< typename _MatrixType, typename _Preconditioner> - struct traits > + template + struct traits > { typedef _MatrixType MatrixType; typedef _Preconditioner Preconditioner; }; - } + } // namespace internal - template< typename _MatrixType, typename _Preconditioner> + template class BiCGSTABL : public IterativeSolverBase > { - typedef IterativeSolverBase Base; - using Base::matrix; - using Base::m_error; - using Base::m_iterations; - using Base::m_info; - using Base::m_isInitialized; - Index m_L = 2; - public: - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef _Preconditioner Preconditioner; - - public: - - /** Default constructor. */ - BiCGSTABL() : Base() {} - - /** Initialize the solver with matrix \a A for further \c Ax=b solving. + typedef IterativeSolverBase Base; + using Base::m_error; + using Base::m_info; + using Base::m_isInitialized; + using Base::m_iterations; + using Base::matrix; + Index m_L = 2; + + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + + public: + /** Default constructor. */ + BiCGSTABL() : Base() {} + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. This constructor is a shortcut for the default constructor followed by a call to compute(). @@ -592,93 +584,91 @@ namespace Eigen this class becomes invalid. Call compute() to update it with the new matrix A, or modify a copy of A. */ - template - explicit BiCGSTABL(const EigenBase& A) : Base(A.derived()) {} + template + explicit BiCGSTABL(const EigenBase &A) : Base(A.derived()) {} - ~BiCGSTABL() {} + ~BiCGSTABL() {} - /** \internal */ - /** Loops over the number of columns of b and does the following: +/** \internal */ +/** Loops over the number of columns of b and does the following: 1. sets the tolerence and maxIterations 2. Calls the function that has the core solver routine */ - #if BICGSTABL_IN_LIBRARY==0 - template - void _solve_with_guess_impl(const Rhs& b, Dest& x) const +#if BICGSTABL_IN_LIBRARY == 0 + template + void _solve_with_guess_impl(const Rhs &b, Dest &x) const + { + _solve_vector_with_guess_impl(b, x); + } +#endif + + template + void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const + { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + + bool ret = internal::BiCGSTABL(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, + m_L); +#if BICGSTABL_DEBUG_INFO + std::cout << "ret: " << ret << std::endl; + std::cout << "m_error: " << m_error << std::endl; + std::cout << "Base::m_tolerance: " << Base::m_tolerance << std::endl; +#endif + + if (ret == false) { - _solve_vector_with_guess_impl(b, x); + m_info = NumericalIssue; + x.setZero(); //x=nan does not pass Eigen's tests even if m_info=NumericalIssue :) + m_error = ((std::isfinite)(m_error) ? m_error : 1.0); } - #endif - - template - void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const + else { - m_iterations = Base::maxIterations(); - m_error = Base::m_tolerance; - - bool ret = internal::BiCGSTABL(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, - m_L); - #if BICGSTABL_DEBUG_INFO - std::cout << "ret: " << ret << std::endl; - std::cout << "m_error: " << m_error << std::endl; - std::cout << "Base::m_tolerance: " << Base::m_tolerance << std::endl; - #endif - - if (ret == false) - { - m_info = NumericalIssue; - x.setZero(); //x=nan does not pass Eigen's tests even if m_info=NumericalIssue :) - m_error = ((std::isfinite)(m_error) ? m_error : 1.0); - } - else - { - m_info = (m_error <= Base::m_tolerance) ? Success - : NoConvergence; - } - - // m_info = (!ret) ? NumericalIssue - // : m_error <= Base::m_tolerance ? Success - // : NoConvergence; - //m_info=NumericalIssue; - #if BICGSTABL_DEBUG_INFO - std::cout << "m_error_returned: " << m_error << std::endl; - std::cout << "m_info: " << m_info << std::endl; - #endif - // m_info = (!ret) ? NumericalIssue - // : m_error <= Base::m_tolerance ? Success - // : NoConvergence; - m_isInitialized = true; + m_info = (m_error <= Base::m_tolerance) ? Success + : NoConvergence; } - /** \internal */ - /** Resizes the x vector to match the dimenstion of b and sets the elements to zero*/ - #if BICGSTABL_IN_LIBRARY==0 - using Base::_solve_impl; - template - void _solve_impl(const MatrixBase& b, Dest& x) const - { +// m_info = (!ret) ? NumericalIssue +// : m_error <= Base::m_tolerance ? Success +// : NoConvergence; +//m_info=NumericalIssue; +#if BICGSTABL_DEBUG_INFO + std::cout << "m_error_returned: " << m_error << std::endl; + std::cout << "m_info: " << m_info << std::endl; +#endif + // m_info = (!ret) ? NumericalIssue + // : m_error <= Base::m_tolerance ? Success + // : NoConvergence; + m_isInitialized = true; + } - x.resize(this->rows(), b.cols()); - x.setZero(); +/** \internal */ +/** Resizes the x vector to match the dimenstion of b and sets the elements to zero*/ +#if BICGSTABL_IN_LIBRARY == 0 + using Base::_solve_impl; + template + void _solve_impl(const MatrixBase &b, Dest &x) const + { - _solve_with_guess_impl(b, x); - } - #endif - void setL(Index L) - { - if (L < 1) - { - L = 2; - } + x.resize(this->rows(), b.cols()); + x.setZero(); - m_L = L; + _solve_with_guess_impl(b, x); + } +#endif + void setL(Index L) + { + if (L < 1) + { + L = 2; } - protected: + m_L = L; + } + protected: }; -} - -#endif /* BiCGSTABL_h */ +} // namespace Eigen +#endif /* EIGEN_BICGSTABL_H */ diff --git a/Eigen/src/IterativeLinearSolvers/IDRStab.h b/Eigen/src/IterativeLinearSolvers/IDRStab.h index bb15747f5..cf137ae8b 100755 --- a/Eigen/src/IterativeLinearSolvers/IDRStab.h +++ b/Eigen/src/IterativeLinearSolvers/IDRStab.h @@ -197,7 +197,7 @@ namespace Eigen VectorType v = U.block(0, i, N, 1); //"How much do v and w have in common?" - h_FOM(i, q - 1) = v.adjoint() * w; + h_FOM(i, q - 1) = v.dot(w); //"Subtract the part they have in common" w = w - h_FOM(i, q - 1) * v; @@ -328,7 +328,7 @@ namespace Eigen //Pre-allocate sigma, this space will be recycled without additional allocations. DenseMatrixTypeCol sigma(S, S); - int rt_counter = k; //Iteration at which R_T was reset last + Index rt_counter = k; //Iteration at which R_T was reset last bool reset_while = false; //Should the while loop be reset for some reason? #if IDRSTAB_ACCURATE -- GitLab From 27584648f739381536aa7a5f8bbb88a33df70f56 Mon Sep 17 00:00:00 2001 From: jwehner Date: Fri, 15 May 2020 14:50:45 +0200 Subject: [PATCH 05/18] cleaned up BiCGSTABL.h --- Eigen/src/IterativeLinearSolvers/BiCGSTABL.h | 472 +++---------------- 1 file changed, 56 insertions(+), 416 deletions(-) diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h index e4e67b84b..d67ab48a8 100755 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h @@ -1,37 +1,15 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2020 Chris Schoutrop +// Copyright (C) 2020 Jens Wehner +// Copyright (C) 2020 Adithya Vijaykumar +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + /* - Chris Schoutrop & Adithya Vijaykumar - -working concept method on 27/02/2019 - -support for complex numbers - -converted to Eigen instead of std::vector - -BLAS2 operation for vector updates - -Right preconditioning - -fixed issue when preconditioner is "too good" and initial guess is zeros; - precond.solve(x) would always be zero. Setting x to random initially works. - There should be a more mathematical derivation on why and how for this though. - -used Eigen's QR decomposition to simplify MR part - -Added condition on L for small matrices (use L=length of x if L>length of x) - -Added fix if r0 becomes orthogonal to rShadow - -Added condition to stop computing if rho1 becomes nan/inf - -Added termination condition if residual is low enough during BiCG step. - -Removed fix if r0 becomes orthogonal to rShadow - -Added reliable updating strategy from enhanced bicgstabl paper - -Initial version of the convex combination from enhanced bicgstabl paper - -Added defines to switch between original, householder QR and convex combination methods for argmin (stab-step) step. - -Added define to switch between left and right preconditioning, note that right-preconditioning may not be compatible currently with reliable update step. - - - Right-preconditioning is applied here; Ax=b->AMinvu=b. - instead of solving for x, we solve for MInv*u - Then at the end x can be obtained via x=MInv*x - See Algorithm 9.5 from Iterative methods for sparse linear systems for inspiration. - - Known issues: - -See TODO's - -If L is too large (~20) instabilities occur. - -Residual gap can be significant (1e3 or more) if 1000+ iterations are needed. This is likely an algorithmic issue, not implementation issue. - In case of IDR(s)Stab(L) this is shown in the paper of Kensuke. - => To fix this, consider the variant of BiCGStab(L) given in Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving - linear systems of equations. Universiteit Utrecht. Mathematisch Instituut, 1996. This implementation of BiCGStab(L) is based on the papers General algorithm: @@ -44,42 +22,31 @@ #ifndef EIGEN_BICGSTABL_H #define EIGEN_BICGSTABL_H -#include #include #include #include -#include - -#define BICGSTABL_IN_LIBRARY 1 //1 is needed to have bicgstab(L) compile with Eigen, as part of the library for the unit tests. 0 to have it as a plugin for the test suite and in Plasimo. -//TODO: Figure out a better fix. - -#define BICGSTABL_REL_UPDATE 1 //0=no reliable update strategy, 1=reliable update strategy -#define BICGSTABL_ALGORITHM 0 //0: original, 1: Householder QR, 2: Convex polynomial method -#define BICGSTABL_PRECOND 1 //0: right, 1: left. 0 may be broken, 1 is checked against Fortrain implementation of Enhanced bicgstabl -/* - Technically it is possible to change these strategies at runtime, however this would also require the user (or some sort of automated criterion) to know why and when certain methods are best. - Currently this is done via defines to eliminate any possible contamination between the methods. -*/ -#define BICGSTABL_DEBUG_INFO 0 //Print info to console about the problem being solved. -#if BICGSTABL_DEBUG_INFO -#include -#endif namespace Eigen { namespace internal { - + /** \internal Low-level bi conjugate gradient stabilized algorithm with L additional residual minimization steps + * \param mat The matrix A + * \param rhs The right hand side vector b + * \param x On input and initial solution, on output the computed solution. + * \param precond A preconditioner being able to efficiently solve for an + * approximation of Ax=b (regardless of b) + * \param iters On input the max number of iteration, on output the number of performed iterations. + * \param tol_error On input the tolerance error, on output an estimation of the relative error. + * \param L On input Number of additional GMRES steps to take. If L is too large (~20) instabilities occur. + * \return false in the case of numerical issue, for example a break down of BiCGSTABL. + */ template - bool BiCGSTABL(const MatrixType &mat, const Rhs &rhs, Dest &x, + bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters, typename Dest::RealScalar &tol_error, Index L) { -#if BICGSTABL_DEBUG_INFO - std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); - std::cout << "Matrix size: " << mat.rows() << std::endl; -#endif using std::abs; using std::sqrt; typedef typename Dest::RealScalar RealScalar; @@ -95,19 +62,11 @@ namespace Eigen typedef Matrix VectorType; typedef Matrix DenseMatrixType; -//We start with an initial guess x_0 and let us set r_0 as (residual calculated from x_0) -#if BICGSTABL_PRECOND == 0 - VectorType r0 = rhs - precond.solve(mat * x); //r_0 -#elif BICGSTABL_PRECOND == 1 + //We start with an initial guess x_0 and let us set r_0 as (residual calculated from x_0) VectorType r0 = precond.solve(rhs - mat * x); //r_0 -#endif - //rShadow is arbritary, but must not be orthogonal to r0. VectorType rShadow = r0; - //RealScalar rShadow_sqnorm = rShadow.squaredNorm(); - //RealScalar rhs_sqnorm = rhs.squaredNorm(); - VectorType x_prime = x; x.setZero(); VectorType b_prime = r0; @@ -117,74 +76,35 @@ namespace Eigen Scalar alpha = 0.0; Scalar omega = 1.0; - //RealScalar tol2 = tol * tol * rShadow_sqnorm; - //RealScalar eps2 = NumTraits::epsilon() * NumTraits::epsilon(); - DenseMatrixType rHat(N, L + 1); DenseMatrixType uHat(N, L + 1); rHat.col(0) = r0; uHat.col(0).fill(0.0); - //Index restarts = 0; bool bicg_convergence = false; - //RealScalar zeta0 = r0.norm(); RealScalar zeta0 = rhs.norm(); RealScalar zeta = zeta0; RealScalar Mx = zeta0; RealScalar Mr = zeta0; + //Criterion for when to apply the group-wise update, conform ref 3. const RealScalar delta = 0.01; bool compute_res = false; bool update_app = false; - //Bool to signal that a new rShadow was chosen and the main loop should be restarted. - //bool reset = false; - while (zeta > tol * zeta0 && k < maxIters) { - rho0 = -omega * rho0; + rho0 *= -omega; - for (Index j = 0; j <= L - 1; ++j) + for (Index j = 0; j < L; ++j) { Scalar rho1 = rShadow.dot(rHat.col(j)); - // if (abs(rho1) < eps2 * rShadow_sqnorm) - // { - // // The new residual vector became too orthogonal to the arbitrarily chosen direction rShadow - // // Let's restart with a new rShadow - // std::cout << "Orthogonality reset triggered" << std::endl; - - // if (k != 0) - // { - // //Choose a new rShadow based on the current solution - // r0 = precond.solve(rhs - mat * x); - // rShadow = r0; - // rho1 = rShadow_sqnorm = rShadow.squaredNorm(); - // rHat.col(0) = r0; - // uHat.col(0).fill(0.0); - // } - // else - // { - // //During the first iteration there is no reason to reset, as nothing would change. - // //Might as well try with a random rShadow instead, as it will be very unlikely to be orthogonal to any vector. - // rShadow.setRandom(); - // } - - // if (restarts++ == 0) - // { - // k = 0; - // } - - // reset = true; - // break; - // } - - if (rho1 - rho1 != 0.0 || rho0 == 0.0) + if ((numext::isnan)(rho1) || rho0 != 0.0) { - std::cout << "Internal BiCGStab(L) parameter became unfit to continue computing" << std::endl; tol_error = zeta / zeta0; return false; } @@ -193,33 +113,19 @@ namespace Eigen rho0 = rho1; //Update search directions - for (Index i = 0; i <= j; ++i) - { + uHat.leftCols(j) = rHat.leftCols(j) - beta * uHat.leftCols(j); - uHat.col(i) = rHat.col(i) - beta * uHat.col(i); - } - -#if BICGSTABL_PRECOND == 0 - uHat.col(j + 1) = mat * precond.solve(uHat.col(j)); -#elif BICGSTABL_PRECOND == 1 uHat.col(j + 1) = precond.solve(mat * uHat.col(j)); -#endif alpha = rho1 / (rShadow.dot(uHat.col(j + 1))); //Update residuals - for (Index i = 0; i <= j; ++i) - { - rHat.col(i) = rHat.col(i) - alpha * uHat.col(i + 1); - } + //TODO check this block + rHat.leftCols(j) -= alpha * uHat.block(0, 1, N, j); -#if BICGSTABL_PRECOND == 0 - rHat.col(j + 1) = mat * precond.solve(rHat.col(j)); -#elif BICGSTABL_PRECOND == 1 rHat.col(j + 1) = precond.solve(mat * rHat.col(j)); -#endif //Complete BiCG iteration by updating x - x = x + alpha * uHat.col(0); + x += alpha * uHat.col(0); //Check for early exit zeta = rHat.col(0).norm(); @@ -236,218 +142,25 @@ namespace Eigen } } -#if BICGSTABL_ALGORITHM == 0 - if (bicg_convergence == false) { - DenseMatrixType tau(L + 1, L + 1); - VectorType sigma(L + 1); - VectorType gamma(L + 1); - VectorType gammaP(L + 1); - VectorType gammaPP(L); - /* - //TODO: Currently the first elements of tau,gamma, gammaP and gammaPP are never used - these do take up (a small amount of) memory. Changing indices to 0-based leads to deviations in notation with the Bicgstabl paper - and may be confusing. - */ - - for (Index j = 1; j <= L; ++j) - { - for (Index i = 1; i <= j - 1; ++i) - { - tau(i, j) = (rHat.col(i).dot(rHat.col(j))) / sigma(i); - rHat.col(j) -= tau(i, j) * rHat.col(i); - } - - sigma(j) = rHat.col(j).squaredNorm(); - gammaP(j) = (rHat.col(j).dot(rHat.col(0))) / sigma(j); - } - - gamma(L) = gammaP(L); - omega = gamma(L); - - //TODO: store tau as triangular, perform backsubstitution with solve - //instead of explicit summation. - for (Index j = L - 1; j >= 1; --j) - { - Scalar sum = 0.0; - - for (Index i = j + 1; i <= L; ++i) - { - sum += tau(j, i) * gamma(i); - } - - gamma(j) = gammaP(j) - sum; - } - - //TODO: See if this can be done without nested loops and explicit sum - for (Index j = 1; j <= L - 1; ++j) - { - Scalar sum = 0.0; - - for (Index i = j + 1; i <= L - 1; ++i) - { - sum += tau(j, i) * gamma(i + 1); - } - - gammaPP(j) = gamma(j + 1) + sum; - } - - x += gamma(1) * rHat.col(0); - rHat.col(0) -= gammaP(L) * rHat.col(L); - uHat.col(0) -= gamma(L) * uHat.col(L); - - x += rHat.block(0, 1, N, L - 1) * gammaPP.segment(1, L - 1); - uHat.col(0) -= uHat.block(0, 1, N, L - 1) * gamma.segment(1, L - 1); - rHat.col(0) -= rHat.block(0, 1, N, L - 1) * gammaP.segment(1, L - 1); - } - -#elif BICGSTABL_ALGORITHM == 1 - - if (bicg_convergence == false) //(L == 1) - { - /* - //TODO: Compare this with the Fortran code step by step in the Enhanced bicgstabL paper, before any hard conclusions are drawn about this section. - */ /* The polynomial/minimize residual step. QR Householder method for argmin is more stable than (modified) Gram-Schmidt, in the sense that there is less loss of orthogonality. It is more accurate than solving the normal equations, since the normal equations scale with condition number squared. - - This method can also be used if L>1, however the paper of Fokkema recommends the convex combination method to maintain convergence. */ - VectorType gamma(L); - // gamma = ((rHat.block(0, 1, N, L)).adjoint() * (rHat.block(0, 1, N, - // L))).llt().solve((rHat.block(0, 1, N, L)).adjoint() * rHat.col(0)); - - // DenseMatrixType Z(L, L); - - // for (Index i = 0; i < L; ++i) - // { - // for (Index j = 0; j <= i; ++j) - // { - // Z(i, j) = rHat.col(i + 1).dot(rHat.col(j + 1)); - - // } - - // } - - //Z = (Z.template selfadjointView()); - - //gamma = Z.llt().solve(Z.col(0)); - //gamma = Z.llt().solve(rHat.block(0, 1, N, L).adjoint() * rHat.col(0)); - gamma = (rHat.block(0, 1, N, L)).householderQr().solve(rHat.col(0)); - x += rHat.block(0, 0, N, L) * gamma; - rHat.col(0) -= rHat.block(0, 1, N, L) * gamma; - uHat.col(0) -= uHat.block(0, 1, N, L) * gamma; + VectorType gamma = (rHat.rightCols(L)).householderQr().solve(rHat.col(0)); + x += rHat.leftCols(L) * gamma; + rHat.col(0) -= rHat.rightCols(L) * gamma; + uHat.col(0) -= uHat.rightCols(L) * gamma; zeta = rHat.col(0).norm(); omega = gamma(L - 1); } -#elif BICGSTABL_ALGORITHM == 2 - - if (L != 1 && bicg_convergence == false) //else - { - //TODO: ADD case for L==1, refer to householder method, or analyitical bicgstab method - /* - Convex combination step - - This takes a combination of Minimum and Orthogonal residual polynomials, - resulting in a more stable determination of BiCG coefficients [Fokkema]. - */ - DenseMatrixType Z(L + 1, L + 1); - - //Z = rHat.adjoint() * rHat; - for (Index i = 0; i <= L; ++i) - { - for (Index j = 0; j <= i; ++j) - { - Z(i, j) = rHat.col(i).dot(rHat.col(j)); - } - } - - // //Z is Hermitian, therefore only one half has to be computed, the other half can be filled in, saving several DOTs. - Z = (Z.template selfadjointView()); - DenseMatrixType Z0 = Z.block(1, 1, L - 1, - L - 1); //Copy to ensure there is no in place decomposition taking place - //Z = (rHat.block(0, 1, N, L)).transpose() * rHat.block(0, 1, N, L); - //Z = rHat.adjoint() * rHat; - //TODO: strictly upper/lower doesnt work, so this is performing some unneeded work. - - //Determine the coefficients for the polynomials. - VectorType y0(L + 1); - VectorType yL(L + 1); - y0(0) = -1.0; - y0(L) = 0.0; - yL(0) = 0.0; - yL(L) = -1.0; - - //The block of Z is used for both y0 and yL, so the decomposition can be recycled. - //PartialPivLU lu_decomposition(Z.block(1, 1, L - 1, L - 1)); - LLT lu_decomposition(Z0); - y0.block(1, 0, L - 1, 1) = lu_decomposition.solve(Z.block(1, 0, L - 1, 1)); - yL.block(1, 0, L - 1, 1) = lu_decomposition.solve(Z.block(1, L, L - 1, 1)); - - //y0.block(1, 0, L - 1, 1) = - //yL.block(1, 0, L - 1, 1) = //Dit toch gwn met householder doen ipv normal eqns - - Scalar kappa0 = y0.adjoint() * (Z * y0); - Scalar kappaL = yL.adjoint() * (Z * yL); - kappa0 = sqrt(kappa0); - kappaL = sqrt(kappaL); - - //Combine the MR and OR versions. - Scalar rho = yL.adjoint() * (Z * y0); - rho = rho / (kappa0 * kappaL); - RealScalar abs_rho = abs(rho); - Scalar gamma_hat = (rho / abs_rho) * (abs_rho > 0.7 ? abs_rho : 0.7); - y0 = y0 - gamma_hat * (kappa0 / kappaL) * yL; - - //The norm of the residual can computed be more efficiently from y0 and Z than from rHat.col(0).norm(). - rho = y0.adjoint() * (Z * y0); - zeta = sqrt(abs(rho)); - - //if (bicg_convergence && zeta - zeta != 0) - if (zeta - zeta != 0) - { -/* - Convergence was achieved during BiCG step, this leads to extremely small values of rHat, and Z. - As a result the polynomial step can break down, however x is likely to still be usable. - */ -#if BICGSTABL_DEBUG_INFO - std::cout << "zeta breakdown" << std::endl; - //std::cout << "rHat: \n" << rHat << std::endl; - //std::cout << "x: \n" << x << std::endl; - //std::cout << "uHat: \n" << uHat << std::endl; - std::cout << "y0: \n" - << y0 << std::endl; - std::cout << "yL: \n" - << yL << std::endl; - std::cout << "Z: \n" - << Z << std::endl; - std::cout << "zeta: \n" - << zeta << std::endl; -#endif - zeta = rHat.col(0).norm(); - break; - } - else - { - //Update - omega = y0(L); - uHat.col(0) -= uHat.block(0, 1, N, L) * y0.block(1, 0, L, 1); - x += rHat.block(0, 0, N, L) * y0.block(1, 0, L, 1); - rHat.col(0) -= rHat.block(0, 1, N, L) * y0.block(1, 0, L, 1); - } - } - -#endif - //TODO: Duplicate update code can be removed for the L=1 and L!=1 case. //TODO: Use analytical expression instead of householder for L=1. - k += L; - //k += 1; + k++; /* Reliable update part @@ -458,7 +171,6 @@ namespace Eigen Furthermore a "group wise update" strategy is used to combine updates, which improves accuracy. */ -#if BICGSTABL_REL_UPDATE Mx = std::max(Mx, zeta); //Maximum norm of residuals since last update of x. Mr = std::max(Mr, zeta); //Maximum norm of residuals since last computation of the true residual. @@ -481,23 +193,16 @@ namespace Eigen if (compute_res) { -#if BICGSTABL_PRECOND == 0 - rHat.col(0) = b_prime - precond.solve(mat * x); -#elif BICGSTABL_PRECOND == 1 //rHat.col(0) = b_prime - mat * x; //Fokkema paper pseudocode //Fokkema paper Fortan code L250-254 - rHat.col(0) = mat * x; - rHat.col(0) = precond.solve(rHat.col(0)); - rHat.col(0) = b_prime - rHat.col(0); -#endif - + rHat.col(0) = b_prime - precond.solve(mat * x); zeta = rHat.col(0).norm(); Mr = zeta; if (update_app) { //After the group wise update, the original problem is translated to a shifted one. - x_prime = x_prime + x; + x_prime += x; x.setZero(); b_prime = rHat.col(0); Mx = zeta; @@ -506,46 +211,27 @@ namespace Eigen compute_res = false; update_app = false; -#endif -#if BICGSTABL_DEBUG_INFO - std::cout << "k: " << k << "res:" << zeta / zeta0 << std::endl; -#endif } //Convert internal variable to the true solution vector x - x = x_prime + x; -#if BICGSTABL_PRECOND == 0 - x = precond.solve(x); -#endif + x += x_prime; tol_error = zeta / zeta0; - //tol_error = zeta; - //tol_error = sqrt(rHat.col(0).squaredNorm() / rhs_sqnorm); - //tol_error = (mat * x - rhs).norm() / rhs.norm(); iters = k; -#if BICGSTABL_DEBUG_INFO - //Print experimental info - std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); - std::chrono::duration time_span = std::chrono::duration_cast>(t2 - t1); - std::cout << "Solver time: " << time_span.count() << " seconds" << std::endl; - std::cout << "#iterations: " << k << std::endl; - std::cout << "Estimated error: " << tol_error << std::endl; - std::cout << "True error: " << (mat * x - rhs).norm() / rhs.norm(); -#endif return true; } } // namespace internal - template < typename _MatrixType, - typename _Preconditioner = DiagonalPreconditioner > + template > class BiCGSTABL; namespace internal { template - struct traits > + struct traits> { typedef _MatrixType MatrixType; typedef _Preconditioner Preconditioner; @@ -554,7 +240,7 @@ namespace Eigen } // namespace internal template - class BiCGSTABL : public IterativeSolverBase > + class BiCGSTABL : public IterativeSolverBase> { typedef IterativeSolverBase Base; using Base::m_error; @@ -562,7 +248,7 @@ namespace Eigen using Base::m_isInitialized; using Base::m_iterations; using Base::matrix; - Index m_L = 2; + Index m_L; public: typedef _MatrixType MatrixType; @@ -572,7 +258,7 @@ namespace Eigen public: /** Default constructor. */ - BiCGSTABL() : Base() {} + BiCGSTABL() : Base() { m_L = 2; } /** Initialize the solver with matrix \a A for further \c Ax=b solving. @@ -585,77 +271,30 @@ namespace Eigen matrix A, or modify a copy of A. */ template - explicit BiCGSTABL(const EigenBase &A) : Base(A.derived()) {} + explicit BiCGSTABL(const EigenBase &A) : Base(A.derived()) { m_L = 2; } ~BiCGSTABL() {} -/** \internal */ -/** Loops over the number of columns of b and does the following: + /** \internal */ + /** Loops over the number of columns of b and does the following: 1. sets the tolerence and maxIterations 2. Calls the function that has the core solver routine */ -#if BICGSTABL_IN_LIBRARY == 0 - template - void _solve_with_guess_impl(const Rhs &b, Dest &x) const - { - _solve_vector_with_guess_impl(b, x); - } -#endif - template void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const { m_iterations = Base::maxIterations(); m_error = Base::m_tolerance; - bool ret = internal::BiCGSTABL(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, - m_L); -#if BICGSTABL_DEBUG_INFO - std::cout << "ret: " << ret << std::endl; - std::cout << "m_error: " << m_error << std::endl; - std::cout << "Base::m_tolerance: " << Base::m_tolerance << std::endl; -#endif + bool ret = internal::bicgstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L); - if (ret == false) - { - m_info = NumericalIssue; - x.setZero(); //x=nan does not pass Eigen's tests even if m_info=NumericalIssue :) - m_error = ((std::isfinite)(m_error) ? m_error : 1.0); - } - else - { - m_info = (m_error <= Base::m_tolerance) ? Success - : NoConvergence; - } - -// m_info = (!ret) ? NumericalIssue -// : m_error <= Base::m_tolerance ? Success -// : NoConvergence; -//m_info=NumericalIssue; -#if BICGSTABL_DEBUG_INFO - std::cout << "m_error_returned: " << m_error << std::endl; - std::cout << "m_info: " << m_info << std::endl; -#endif - // m_info = (!ret) ? NumericalIssue - // : m_error <= Base::m_tolerance ? Success - // : NoConvergence; - m_isInitialized = true; + m_info = (!ret) ? NumericalIssue + : m_error <= Base::m_tolerance ? Success + : NoConvergence; } -/** \internal */ -/** Resizes the x vector to match the dimenstion of b and sets the elements to zero*/ -#if BICGSTABL_IN_LIBRARY == 0 - using Base::_solve_impl; - template - void _solve_impl(const MatrixBase &b, Dest &x) const - { - - x.resize(this->rows(), b.cols()); - x.setZero(); - - _solve_with_guess_impl(b, x); - } -#endif + /** \internal */ + /** Sets the parameter L, indicating the amount of minimize residual steps are used. */ void setL(Index L) { if (L < 1) @@ -667,6 +306,7 @@ namespace Eigen } protected: + }; } // namespace Eigen -- GitLab From 3778bec395b6b6b6eac03a1d79de05b5bc47e3ff Mon Sep 17 00:00:00 2001 From: jwehner Date: Fri, 15 May 2020 16:44:06 +0200 Subject: [PATCH 06/18] cleanup idrstab --- Eigen/src/IterativeLinearSolvers/IDRStab.h | 687 +++++---------------- 1 file changed, 156 insertions(+), 531 deletions(-) diff --git a/Eigen/src/IterativeLinearSolvers/IDRStab.h b/Eigen/src/IterativeLinearSolvers/IDRStab.h index cf137ae8b..9e9a21b22 100755 --- a/Eigen/src/IterativeLinearSolvers/IDRStab.h +++ b/Eigen/src/IterativeLinearSolvers/IDRStab.h @@ -1,5 +1,16 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2020 Chris Schoutrop +// Copyright (C) 2020 Mischa Senders +// Copyright (C) 2020 Lex Kuijpers +// Copyright (C) 2020 Jens Wehner +// Copyright (C) 2020 Adithya Vijaykumar +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* - C.E.M. Schoutrop The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L) //TODO: elaborate what this improves over BiCGStab here @@ -14,8 +25,6 @@ 4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family of Simple and Fast Algorithms for Solving Large Nonsymmetric Systems of Linear Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062. doi:10.1137/070685804 5. Sonneveld, P. (2012). On the convergence behavior of IDR (s) and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598. - Special acknowledgement to Mischa Senders for his work on an initial reference implementation of this algorithm in MATLAB, to Lex Kuijpers for testing the IDRStab implementation and to Adithya Vijaykumar for providing the framework for this solver. - Right-preconditioning based on Ref. 3 is implemented here. */ @@ -25,60 +34,36 @@ #include #include #include -#include - -//TODO: Remove the debug info, and obsolete options in final version. -#define IDRSTAB_DEBUG_INFO 0 //Print info to console about the problem being solved. -#define SAVE_FAILS 0 //Save matrices that didn't converge -#define IDRSTAB_ACCURATE true //true: Accurate version to pass unit tests, false: time-optimized version that works for probably every reasonable case. -#define IDRSTAB_INF_NORM false //true: Use the faster, slightly more strict and not normally used, infinity norm where possible. False: use the standard 2-norm. - -#if SAVE_FAILS -#include -#include "eigen3/unsupported/Eigen/src/SparseExtra/MarketIO.h" -#endif -#if IDRSTAB_DEBUG_INFO > 0 -#include -#endif + namespace Eigen { namespace internal { - template - bool idrstab(const MatrixType& mat, const Rhs& rhs, Dest& x, - const Preconditioner& precond, Index& iters, - typename Dest::RealScalar& tol_error, Index L, Index S) + template + bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, + const Preconditioner &precond, Index &iters, + typename Dest::RealScalar &tol_error, Index L, Index S) { /* Setup and type definitions. */ + using std::abs; + using std::sqrt; typedef typename Dest::Scalar Scalar; typedef typename Dest::RealScalar RealScalar; typedef Matrix VectorType; typedef Matrix DenseMatrixTypeCol; typedef Matrix DenseMatrixTypeRow; - #if IDRSTAB_DEBUG_INFO >0 - std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now(); - std::cout << "Matrix size: " << mat.rows() << std::endl; - std::cout << mat.IsRowMajor << std::endl; - //std::cout << "rhs\n" << rhs << std::endl; - //std::cout << "A\n" << mat << std::endl; - #endif const Index N = x.rows(); Index k = 0; //Iteration counter const Index maxIters = iters; - //PO: sqrNorm() saves 1 sqrt calculation - #if IDRSTAB_INF_NORM - const RealScalar rhs_norm = rhs.template lpNorm(); - #else const RealScalar rhs_norm = rhs.norm(); - #endif const RealScalar tol2 = tol_error * rhs_norm; if (rhs_norm == 0) @@ -90,36 +75,13 @@ namespace Eigen /* exit */ - #if IDRSTAB_DEBUG_INFO >1 - std::cout << "rhs_norm==0 exit" << std::endl; - #endif x.setZero(); tol_error = 0.0; return true; } //Construct decomposition objects beforehand. - #if IDRSTAB_ACCURATE ColPivHouseholderQR qr_solver; FullPivLU lu_solver; - #else - HouseholderQR qr_solver; - PartialPivLU lu_solver; - #endif - /* - Solving small S x S systems: - From numerical experiment is follows that in some cases (when the tolerance is really too low) inverting sigma is delicate, - since the resulting alpha will be close to zero. If this is not done accurately enough the algorithm breaks down. - Based on the benchmark data from: https://eigen.tuxfamily.org/dox/group__DenseDecompositionBenchmark.html - it is concluded that for the small (SxS) systems the difference between PartialPivLU and FullPivLU is negligible. - HouseholderQR methods can be used for these systems as well, however it is about 2x as slow. - - Solving least squares problem in the polynomial step: - ColPivHouseholderQR is sufficiently accurate, based on the benchmark data this is about 2x as fast compared to FillPivHouseholderQR. - - Resulting strategy: - -Use the more accurate, but slower FullPivLU version for small systems - -Use ColPivHouseholderQR for the minimal residual step. - */ if (S >= N || L >= N) { @@ -130,16 +92,9 @@ namespace Eigen /* Exit */ - #if IDRSTAB_DEBUG_INFO >1 - std::cout << "Trivial matrix exit" << std::endl; - #endif lu_solver.compute(DenseMatrixTypeRow(mat)); x = lu_solver.solve(rhs); - #if IDRSTAB_INF_NORM - tol_error = (rhs - mat * x).template lpNorm() / rhs_norm; - #else tol_error = (rhs - mat * x).norm() / rhs_norm; - #endif return true; } @@ -147,7 +102,7 @@ namespace Eigen VectorType u(N * (L + 1)); VectorType r(N * (L + 1)); DenseMatrixTypeCol V(N * (L + 1), S); - DenseMatrixTypeCol U(N * (L + 1), S); + DenseMatrixTypeCol rHat(N, L + 1); VectorType alpha(S); @@ -159,11 +114,7 @@ namespace Eigen */ //Set up the initial residual r.head(N) = rhs - mat * precond.solve(x); - #if IDRSTAB_INF_NORM - tol_error = r.head(N).template lpNorm(); - #else tol_error = r.head(N).norm(); - #endif DenseMatrixTypeRow h_FOM(S, S - 1); h_FOM.setZero(); @@ -171,12 +122,13 @@ namespace Eigen /* Determine an initial U matrix of size N x S */ - for (Index q = 0; q < S; ++q) + + DenseMatrixTypeCol U(N * (L + 1), S); + for (Index col_index = 0; col_index < S; ++col_index) { - //By default S=4, q!=0 case is more likely, this ordering is better for branch prediction. //Arnoldi-like process to generate a set of orthogonal vectors spanning {u,A*u,A*A*u,...,A^(S-1)*u}. //This construction can be combined with the Full Orthogonalization Method (FOM) from Ref.3 to provide a possible early exit with no additional MV. - if (q != 0) + if (col_index != 0) { /* Original Gram-Schmidt orthogonalization strategy from Ref. 1: @@ -191,21 +143,21 @@ namespace Eigen http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1610 */ VectorType w = mat * precond.solve(u.head(N)); - for (Index i = 0; i < q; ++i) + for (Index i = 0; i < col_index; ++i) { //"Normalization factor" (v is normalized already) - VectorType v = U.block(0, i, N, 1); + auto v = U.col(i).head(N); //"How much do v and w have in common?" - h_FOM(i, q - 1) = v.dot(w); + h_FOM(i, col_index - 1) = v.dot(w); //"Subtract the part they have in common" - w = w - h_FOM(i, q - 1) * v; + w -= h_FOM(i, col_index - 1) * v; } u.head(N) = w; - h_FOM(q, q - 1) = u.head(N).norm(); + h_FOM(col_index, col_index - 1) = u.head(N).norm(); - if (std::abs(h_FOM(q, q - 1)) != 0.0) + if (abs(h_FOM(col_index, col_index - 1)) != 0.0) { /* This only happens if u is NOT exactly zero. In case it is exactly zero @@ -224,53 +176,33 @@ namespace Eigen Contrary to what one would suspect, the comparison with ==0.0 for floating-point types is intended here. Any arbritary non-zero u is fine to continue, however if u contains either NaN or Inf the algorithm will break down. */ - u.head(N) /= h_FOM(q, q - 1); + u.head(N) /= h_FOM(col_index, col_index - 1); } - } else { u.head(N) = r.head(N); - u.head(N) /= u.head(N).norm(); + u.head(N).normalize(); } - U.block(0, q, N, 1) = u.head(N); + U.col(col_index).head(N) = u.head(N); } + if (S > 1) { /* Check for early FOM exit. */ - //Scalar beta = tol_error; //r.head(N).norm(): This is expected to be tol_error at this point! - Scalar beta = r.head(N).norm(); //This must be the actual length of the vector, not the infinity norm estimate. + Scalar beta = r.head(N).norm(); VectorType e1 = VectorType::Zero(S - 1); e1(0) = beta; lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1)); VectorType y = lu_solver.solve(e1); - VectorType x2 = x + U.block(0, 0, N, S - 1) * y; + VectorType x2 = x + U.topLeftCorner(N, S - 1) * y; //Using proposition 6.7 in Saad, one MV can be saved to calculate the residual - #if IDRSTAB_INF_NORM - RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.block(0, S - 1, N, 1)).template lpNorm(); - #else - RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.block(0, S - 1, N, 1)).norm(); - #endif - - #if IDRSTAB_DEBUG_INFO >1 - std::cout << "h_FOM\n" << h_FOM << std::endl; - std::cout << "h_FOM(S-1, S - 2)\n" << h_FOM(S - 1, S - 2) << std::endl; - std::cout << "y(S-1)\n" << y(S - 2) << std::endl; - std::cout << "y\n" << y << std::endl; - #if IDRSTAB_INF_NORM - std::cout << "U.block(0,S-1,N,1).template lpNorm()\n" << U.block(0, S - 1, N, 1).template lpNorm() << std::endl; - RealScalar FOM_actual_res = (rhs - mat * precond.solve(x2)).template lpNorm() / rhs_norm; - #else - std::cout << "U.block(0,S-1,N,1).norm()\n" << U.block(0, S - 1, N, 1).norm() << std::endl; - RealScalar FOM_actual_res = (rhs - mat * precond.solve(x2)).norm() / rhs_norm; - #endif - std::cout << "FOM actual residual\n" << FOM_actual_res << std::endl; - std::cout << "FOM estimated residual from Saad method\n " << FOM_residual / rhs_norm << std::endl; - #endif + RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.col(S - 1).head(N)).norm(); + if (FOM_residual < tol2) { /* @@ -279,29 +211,10 @@ namespace Eigen iters = k; x = precond.solve(x2); tol_error = FOM_residual / rhs_norm; - #if IDRSTAB_DEBUG_INFO >0 - std::cout << "Speculative FOM exit" << std::endl; - std::cout << "Estimated relative residual: " << tol_error << std::endl; - #if IDRSTAB_INF_NORM - std::cout << "True relative residual: " << (mat * x - rhs).template lpNorm() / rhs.template lpNorm() << - std::endl; - #else - std::cout << "True relative residual: " << (mat * x - rhs).norm() / rhs.norm() << std::endl; - #endif - - #endif return true; } } - #if IDRSTAB_DEBUG_INFO >1 - //Columns of U should be orthonormal - std::cout << "Check orthonormality U\n" << - U.block(0, 0, N, S).adjoint()*U.block(0, 0, N, S) << std::endl; - //h_FOM should not contain any NaNs - std::cout << "h_FOM:\n" << h_FOM << std::endl; - #endif - /* Select an initial (N x S) matrix R0. 1. Generate random R0, orthonormalize the result. @@ -317,28 +230,22 @@ namespace Eigen //Original IDRStab and Kensuke choose S random vectors: HouseholderQR qr(DenseMatrixTypeCol::Random(N, S)); DenseMatrixTypeRow R_T = (qr.householderQ() * DenseMatrixTypeCol::Identity(N, - S)).adjoint(); + S)) + .adjoint(); DenseMatrixTypeRow AR_T = DenseMatrixTypeRow(R_T * mat); - #if IDRSTAB_DEBUG_INFO >1 - std::cout << "Check orthonormality R_T\n" << - R_T* R_T.adjoint() << std::endl; - #endif - //Pre-allocate sigma, this space will be recycled without additional allocations. DenseMatrixTypeCol sigma(S, S); - Index rt_counter = k; //Iteration at which R_T was reset last - bool reset_while = false; //Should the while loop be reset for some reason? + Index rt_counter = k; //Iteration at which R_T was reset last + bool reset_while = false; //Should the while loop be reset for some reason? - #if IDRSTAB_ACCURATE - VectorType Q(S, 1); //Vector containing the row-scaling applied to sigma - VectorType P(S, 1); //Vector containing the column-scaling applied to sigma - DenseMatrixTypeCol QAP(S, S); //Scaled sigma + VectorType Q(S, 1); //Vector containing the row-scaling applied to sigma + VectorType P(S, 1); //Vector containing the column-scaling applied to sigma + DenseMatrixTypeCol QAP(S, S); //Scaled sigma bool repair_flag = false; RealScalar residual_0 = tol_error; //bool apply_r_exit = false; - #endif while (k < maxIters) { @@ -361,50 +268,36 @@ namespace Eigen /* Suspected is that sigma could be badly scaled, since causes alpha~=0, but the update vector is not zero. To improve stability we scale with absolute row and col sums first. - Sigma can become badly scaled (but still well-conditioned) for the ASML matrices, where sigma~=0 happens. + Sigma can become badly scaled (but still well-conditioned). A bad sigma also happens if R_T is not chosen properly, for example if R_T is zeros sigma would be zeros as well. The effect of this is a left-right preconditioner, instead of solving Ax=b, we solve - Q*A*P*inv(P)*b=Q*b. + Q*A*P*inv(P)*x=Q*b. */ - #if IDRSTAB_ACCURATE - Q = (sigma.cwiseAbs().rowwise().sum()).cwiseInverse(); //Calculate absolute inverse row sum - QAP = Q.asDiagonal() * sigma; //Scale with inverse absolute row sums - P = (QAP.cwiseAbs().colwise().sum()).cwiseInverse(); //Calculate absolute inverse column sum - QAP = QAP * P.asDiagonal(); //Scale with inverse absolute column sums + + Q = (sigma.cwiseAbs().rowwise().sum()).cwiseInverse(); //Calculate absolute inverse row sum + QAP = Q.asDiagonal() * sigma; //Scale with inverse absolute row sums + P = (QAP.cwiseAbs().colwise().sum()).cwiseInverse(); //Calculate absolute inverse column sum + QAP = QAP * P.asDiagonal(); //Scale with inverse absolute column sums lu_solver.compute(QAP); - #else - lu_solver.compute(sigma); - #endif //Obtain the update coefficients alpha - if (j != 1) + if (j == 1) { - //alpha=inverse(sigma)*(AR_T*r_{j-2}) - #if IDRSTAB_ACCURATE - alpha.noalias() = lu_solver.solve(Q.asDiagonal() * AR_T * precond.solve(r.segment(N * (j - 2), N))); - #else - alpha.noalias() = lu_solver.solve(AR_T * precond.solve(r.segment(N * (j - 2), N))); - #endif + //alpha=inverse(sigma)*(R_T*r_0); + alpha.noalias() = lu_solver.solve(Q.asDiagonal() * R_T * r.head(N)); } else { - //alpha=inverse(sigma)*(R_T*r_0); - #if IDRSTAB_ACCURATE - alpha.noalias() = lu_solver.solve(Q.asDiagonal() * R_T * r.head(N)); - #else - alpha.noalias() = lu_solver.solve(R_T * r.head(N)); - #endif + //alpha=inverse(sigma)*(AR_T*r_{j-2}) + alpha.noalias() = lu_solver.solve(Q.asDiagonal() * AR_T * precond.solve(r.segment(N * (j - 2), N))); } //Unscale the solution - #if IDRSTAB_ACCURATE alpha = P.asDiagonal() * alpha; - #endif - //TODO: Check if another badly scaled scenario exists double old_res = tol_error; //Obtain new solution and residual from this update update.noalias() = U.topRows(N) * alpha; - r.head(N) -= mat * precond.solve(update); + r.head(N) -= mat * precond.solve(update); x += update; for (Index i = 1; i <= j - 2; ++i) @@ -417,74 +310,30 @@ namespace Eigen //r=[r;A*r_{j-2}] r.segment(Nj_min_1, N).noalias() = mat * precond.solve(r.segment(N * (j - 2), N)); } - #if IDRSTAB_DEBUG_INFO >1 - #if IDRSTAB_INF_NORM - std::cout << "r.head(N).template lpNorm()380: " << r.head(N).template lpNorm() << std::endl; - std::cout << "update.norm()" << update.template lpNorm() << std::endl; - std::cout << "update.norm()/old_res" << update.template lpNorm() / old_res << std::endl; - #else - std::cout << "r.head(N).norm()380: " << r.head(N).norm() << std::endl; - std::cout << "update.norm()" << update.norm() << std::endl; - std::cout << "update.norm()/old_res" << update.norm() / old_res << std::endl; - #endif - std::cout << "QAP\n" << QAP << std::endl; - std::cout << "sigma\n" << sigma << std::endl; - std::cout << "alpha380: " << alpha << std::endl; - - //If alpha380 is the zero vector, this can be caused by R0 being no good. - //It turns out that the update can be significant, even if alpha~=0, - //this suggests sigma can just be really badly scaled - //This was also found in the early version by Mischa, but thought it was resolved. - //If R_T=identity, then sigma is all zeros. - //A "bad" sigma can be obtained by choosing: - //R_T = DenseMatrixTypeRow::Identity(S, N); - //R_T = R_T+1e-12*DenseMatrixTypeRow::Random(S, N); - #endif - #if IDRSTAB_DEBUG_INFO >2 - VectorType x_unscaled; - VectorType x_scaled; - VectorType b = VectorType::Random(S, 1); - x_unscaled = sigma.fullPivLu().solve(b); - x_scaled = P.asDiagonal() * QAP.fullPivLu().solve(Q.asDiagonal() * b); - std::cout << "x_unscaled\n" << x_unscaled << std::endl; - std::cout << "x_scaled\n" << x_scaled << std::endl; - #endif - #if IDRSTAB_INF_NORM - tol_error = r.head(N).template lpNorm(); - #else tol_error = r.head(N).norm(); - #endif + if (tol_error < tol2) { //If at this point the algorithm has converged, exit. reset_while = true; break; } - #if IDRSTAB_ACCURATE - if (repair_flag==false && tol_error > 10 * residual_0) + + if (repair_flag == false && tol_error > 10 * residual_0) { //Sonneveld's repair flag suggestion from [5] //This massively reduces problems with false residual estimates (if they'd occur) repair_flag = true; - #if IDRSTAB_DEBUG_INFO > 0 - std::cout << "repair flag set true, iter: " << k << std::endl; - #endif } if (repair_flag && 1000 * tol_error < residual_0) { + //1000 comes from Sonneveld's repair flag suggestion from [5] r.head(N) = rhs - mat * precond.solve(x); repair_flag = false; - #if IDRSTAB_DEBUG_INFO > 0 - std::cout << "repair flag reset iter: " << k << std::endl; - #endif } - #endif + bool reset_R_T = false; - #if IDRSTAB_INF_NORM - if (alpha.template lpNorm() * rhs_norm < S * NumTraits::epsilon() * old_res) - #else if (alpha.norm() * rhs_norm < S * NumTraits::epsilon() * old_res) - #endif { //This would indicate the update computed by alpha did nothing much to decrease the residual //apparantly we've also not converged either. @@ -499,9 +348,6 @@ namespace Eigen /* Only regenerate if it didn't already happen this iteration. */ - #if IDRSTAB_DEBUG_INFO >1 - std::cout << "Generating new R_T" << std::endl; - #endif //Choose new R0 and try again qr.compute(DenseMatrixTypeCol::Random(N, S)); R_T = (qr.householderQ() * DenseMatrixTypeCol::Identity(N, S)).transpose(); //.adjoint() vs .transpose() makes no difference, R_T is random anyways. @@ -510,33 +356,29 @@ namespace Eigen This results in AR_T, which can be precomputed. */ AR_T = DenseMatrixTypeRow(R_T * mat); - j = 0; + j = 0; //WARNING reset the for loop counter rt_counter = k; continue; - //reset_while = true; - //break; } } - bool break_normalization=false; + bool break_normalization = false; for (Index q = 1; q <= S; ++q) { - if (q != 1) + if (q == 1) { - //u=[u_1;u_2;...;u_j] - u.head(Nj) = u.segment(N, Nj); + //u = r; + u.head(Nj_plus_1) = r.topRows(Nj_plus_1); } else { - //u = r; - u.head(Nj_plus_1) = r.topRows(Nj_plus_1); + //u=[u_1;u_2;...;u_j] + u.head(Nj) = u.segment(N, Nj); } //Obtain the update coefficients beta implicitly //beta=lu_sigma.solve(AR_T * u.block(Nj_min_1, 0, N, 1) - #if IDRSTAB_ACCURATE - u.head(Nj) -= U.topRows(Nj) * P.asDiagonal() * lu_solver.solve(Q.asDiagonal() * AR_T * precond.solve(u.segment(Nj_min_1, N))); - #else - u.head(Nj) -= U.topRows(Nj) * lu_solver.solve(AR_T * precond.solve(u.segment(Nj_min_1, N))); - #endif + + u.head(Nj) -= U.topRows(Nj) * P.asDiagonal() * lu_solver.solve(Q.asDiagonal() * AR_T * precond.solve(u.segment(Nj_min_1, N))); + //u=[u;Au_{j-1}] u.segment(Nj, N).noalias() = mat * precond.solve(u.segment(Nj_min_1, N)); @@ -544,33 +386,24 @@ namespace Eigen if (q > 1) { /* - Original Gram-Schmidt-like procedure to make u orthogonal to the columns of V from Ref. 1. + Modified Gram-Schmidt-like procedure to make u orthogonal to the columns of V from Ref. 1. The vector mu from Ref. 1 is obtained implicitly: mu=V.block(Nj, 0, N, q - 1).adjoint() * u.block(Nj, 0, N, 1). */ - #if IDRSTAB_ACCURATE==false - u.head(Nj_plus_1) -= V.topLeftCorner(Nj_plus_1, q - 1) * (V.block(Nj, 0, N, q - 1).adjoint() * u.segment(Nj, N)); - #else - /* - The same, but using MGS instead of GS - */ - DenseMatrixTypeCol h(1, 1); //Eventhough h should be Scalar, casting from a 1x1 matrix to Scalar is not supported. + for (Index i = 0; i <= q - 2; ++i) { //"Normalization factor" - h = V.block(Nj, i, N, 1).adjoint() * V.block(Nj, i, N, 1); + Scalar h = V.col(i).segment(Nj, N).squaredNorm(); //"How much do u and V have in common?" - h = V.block(Nj, i, N, 1).adjoint() * u.segment(Nj, N) / h(0, 0); + h = V.col(i).segment(Nj, N).dot(u.segment(Nj, N)) / h; //"Subtract the part they have in common" - u.head(Nj_plus_1) -= h(0, 0) * V.block(0, i, Nj_plus_1, 1); - //std::cout << "h\n" << h << std::endl; + u.head(Nj_plus_1) -= h * V.block(0, i, Nj_plus_1, 1); } - #endif } - #if IDRSTAB_ACCURATE //Normalize u and assign to a column of V Scalar normalization_constant = u.block(Nj, 0, N, 1).norm(); @@ -584,108 +417,32 @@ namespace Eigen } else { - // iters = k; - // x = precond.solve(x); - // tol_error = tol_error / rhs_norm; - - // #if IDRSTAB_DEBUG_INFO >0 - // std::cout << "normalization_constant==0 exit" << std::endl; - // std::cout << "Estimated relative residual: " << tol_error << std::endl; - // #if IDRSTAB_INF_NORM - // std::cout << "True relative residual: " << (mat * x - rhs).template lpNorm() / rhs.template lpNorm() << - // std::endl; - // #else - // std::cout << "True relative residual: " << (mat * x - rhs).norm() / rhs.norm() << std::endl; - // #endif + u.head(Nj_plus_1).setZero(); - //u.head(Nj_plus_1).setRandom(); //TODO: bij gebrek aan beter >_> - //u.head(Nj_plus_1)=u.head(Nj_plus_1)/u.head(Nj_plus_1).norm(); - #if IDRSTAB_DEBUG_INFO >0 - std::cout << "normalization_constant==0" << std::endl; - std::cout << "tol_error/rhs_norm: " <::epsilon()) - //if(tol_error < tol2) + if (tol_error < tol2 || tol_error < 1e4 * NumTraits::epsilon()) { //Just quit, we've converged iters = k; x = precond.solve(x); - tol_error = tol_error / rhs_norm; - tol_error = 0.0; - #if IDRSTAB_DEBUG_INFO >0 - std::cout << "normalization_constant==0 EXIT" << std::endl; - #endif + tol_error = (rhs - mat * x).norm() / rhs_norm; return true; } - //#endif - //TODO: This happens if A is singular. We may need to return the best solution and quit. - //Currently this can happen many many times in a row (although never for the unit tests). - //apply_r_exit = true; - //break; - break_normalization=true; + break_normalization = true; break; - //return true; //TODO - //return false; } V.block(0, q - 1, Nj_plus_1, 1).noalias() = u.head(Nj_plus_1); - // if(break_normalization) - // { - // break; - // } - #else - //Since the segment u.head(Nj_plus_1) is not needed next q-iteration this may be combined into one (Only works for GS method, not MGS): - V.block(0, q - 1, Nj_plus_1, 1).noalias() = u.head(Nj_plus_1) / u.segment(Nj, N).norm(); - #endif - - #if IDRSTAB_DEBUG_INFO >1 - { - std::cout << "New u should be orthonormal to the columns of V" << std::endl; - DenseMatrixTypeCol u_V_orthonormality = V.block(Nj, 0, N, q).adjoint() * u.block(Nj, 0, N, 1); - std::cout << u_V_orthonormality << std::endl; //OK - if (u_V_orthonormality(0, 0) - u_V_orthonormality(0, 0) != 0.0) - { - - std::cout << "Internal state:" << std::endl; - std::cout << "V:\n" << V << std::endl; - std::cout << "U:\n" << U << std::endl; - std::cout << "r:\n" << r << std::endl; - std::cout << "u:\n" << u << std::endl; - std::cout << "x:\n" << x << std::endl; - //std::cout<<"mat:\n "<1 - //This should be identity, since the columns of V are orthonormalized. - std::cout << "Check if the columns of V are orthonormalized" << std::endl; - std::cout << V.block(Nj, 0, N, S).adjoint()* V.block(Nj, 0, N, S) << std::endl; - #endif - if (break_normalization==false) + if (break_normalization == false) { U = V; } - } if (reset_while) { reset_while = false; - #if IDRSTAB_INF_NORM - tol_error = r.head(N).template lpNorm(); - #else tol_error = r.head(N).norm(); - #endif if (tol_error < tol2) { /* @@ -701,6 +458,7 @@ namespace Eigen //Save this in rHat, the storage form for rHat is more suitable for the argmin step than the way r is stored. //In Eigen 3.4 this step can be compactly done via: rHat = r.reshaped(N, L + 1); r.segment(N * L, N).noalias() = mat * precond.solve(r.segment(N * (L - 1), N)); + for (Index i = 0; i <= L; ++i) { rHat.col(i) = r.segment(N * i, N); @@ -709,12 +467,8 @@ namespace Eigen /* The polynomial step */ - #if IDRSTAB_ACCURATE qr_solver.compute(rHat.rightCols(L)); gamma.noalias() = qr_solver.solve(r.head(N)); - #else - gamma.noalias() = (rHat.rightCols(L).adjoint() * rHat.rightCols(L)).llt().solve(rHat.rightCols(L).adjoint() * r.head(N)); - #endif //Update solution and residual using the "minimized residual coefficients" update.noalias() = rHat.leftCols(L) * gamma; @@ -723,53 +477,26 @@ namespace Eigen //Update iteration info ++k; - #if IDRSTAB_INF_NORM - tol_error = r.head(N).template lpNorm(); - #else tol_error = r.head(N).norm(); - #endif - #if IDRSTAB_DEBUG_INFO > 0 - // if(apply_r_exit) - // { - // //u normalization failed, cannot continue with next iteration? - // iters = k; - // x = precond.solve(x); - // tol_error = tol_error / rhs_norm; - // return true; - // } - if(k % 10 == 0) - { - std::cout << "Current residual: " << tol_error / rhs_norm << " iter: " << k << std::endl; - } - #endif + if (tol_error < tol2) { //Slightly early exit by moving the criterion before the update of U, //after the main while loop the result of that calculation would not be needed. - #if IDRSTAB_DEBUG_INFO >1 - std::cout << "tol_error break" << std::endl; - #endif break; } - #if IDRSTAB_ACCURATE - if (repair_flag==false && tol_error > 10 * residual_0) + + if (repair_flag == false && tol_error > 10 * residual_0) { //Sonneveld's repair flag suggestion from [5] //This massively reduces problems with false residual estimates (if they'd occur) repair_flag = true; - #if IDRSTAB_DEBUG_INFO > 0 - std::cout << "repair flag set true, iter: " << k << std::endl; - #endif } if (repair_flag && 1000 * tol_error < residual_0) { r.head(N) = rhs - mat * precond.solve(x); repair_flag = false; - #if IDRSTAB_DEBUG_INFO > 0 - std::cout << "repair flag reset iter: " << k << std::endl; - #endif } - #endif /* U=U0-sum(gamma_j*U_j) @@ -780,7 +507,6 @@ namespace Eigen { U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1); } - } /* @@ -789,69 +515,54 @@ namespace Eigen iters = k; x = precond.solve(x); tol_error = tol_error / rhs_norm; - #if IDRSTAB_DEBUG_INFO >0 - std::cout << "Final exit" << std::endl; - std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now(); - std::chrono::duration time_span = std::chrono::duration_cast> - (t2 - t1); - std::cout << "Solver time: " << time_span.count() << " seconds" << std::endl; - std::cout << "#iterations: " << k << std::endl; - std::cout << "Estimated relative residual: " << tol_error << std::endl; - #if IDRSTAB_INF_NORM - std::cout << "True relative residual: " << (mat * x - rhs).template lpNorm() / rhs.template lpNorm() << - std::endl; - #else - std::cout << "True relative residual: " << (mat * x - rhs).norm() / rhs.norm() << - std::endl; - #endif - - - #endif - return true; } - } + } // namespace internal - template< typename _MatrixType, - typename _Preconditioner = DiagonalPreconditioner > + template > class IDRStab; namespace internal { - template< typename _MatrixType, typename _Preconditioner> - struct traits > + template + struct traits> { typedef _MatrixType MatrixType; typedef _Preconditioner Preconditioner; }; - } + } // namespace internal - template< typename _MatrixType, typename _Preconditioner> - class IDRStab : public IterativeSolverBase > + template + class IDRStab : public IterativeSolverBase> { - typedef IterativeSolverBase Base; - using Base::matrix; - using Base::m_error; - using Base::m_iterations; - using Base::m_info; - using Base::m_isInitialized; - Index m_L = 2; - Index m_S = 4; - public: - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef _Preconditioner Preconditioner; - - public: - - /** Default constructor. */ - IDRStab() : Base() {} + typedef IterativeSolverBase Base; + using Base::m_error; + using Base::m_info; + using Base::m_isInitialized; + using Base::m_iterations; + using Base::matrix; + Index m_L; + Index m_S; + + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + + public: + /** Default constructor. */ + IDRStab() : Base() + { + m_L = 2; + m_S = 4; + } - /** Initialize the solver with matrix \a A for further \c Ax=b solving. + /** Initialize the solver with matrix \a A for further \c Ax=b solving. This constructor is a shortcut for the default constructor followed by a call to compute(). @@ -861,140 +572,54 @@ namespace Eigen this class becomes invalid. Call compute() to update it with the new matrix A, or modify a copy of A. */ - template - explicit IDRStab(const EigenBase& A) : Base(A.derived()) {} + template + explicit IDRStab(const EigenBase &A) : Base(A.derived()) + { + m_L = 2; + m_S = 4; + } - ~IDRStab() {} + ~IDRStab() {} - /** \internal */ - /** Loops over the number of columns of b and does the following: - 1. Sets the tolerance and maxIterations - 2. Calls the function that has the core solver routine - */ - // template - // void _solve_with_guess_impl(const Rhs& b, Dest& x) const - // { - // _solve_vector_with_guess_impl(b, x); - // } - // using Base::_solve_impl; - // template - // void _solve_impl(const MatrixBase& b, Dest& x) const - // { - - // x.resize(this->rows(), b.cols()); - // x.setZero(); - // //x.setRandom(); - // //TODO:This may break if b contains multiple columns, but is probably better than choosing zeros. - // //Random is more reliable than zeros, one can find cases where MATLAB's bicgstabl does not converge with a zero guess either, but does with random. - // //Unit tests pass more often with random compared to zero. - // //x = Base::m_preconditioner.solve(b); - // // for (Index i = 0; i < b.cols(); ++i) - // // { - // // x.col(i) = Base::m_preconditioner.solve(b.col(i)); - // // } - // _solve_with_guess_impl(b, x); - // } - - template - void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const - { - m_iterations = Base::maxIterations(); - m_error = Base::m_tolerance; - bool ret = internal::idrstab(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, - m_L, m_S); - - m_info = (!ret) ? NumericalIssue - : m_error <= 10 * Base::m_tolerance ? Success - : NoConvergence; - - #if IDRSTAB_DEBUG_INFO >0 - std::cout << "Matrix size: " << b.rows() << std::endl; - std::cout << "ret: " << ret << std::endl; - std::cout << "m_error: " << m_error << std::endl; - std::cout << "Base::m_tolerance: " << Base::m_tolerance << std::endl; - std::cout << "m_info: " << m_info << std::endl; - if (m_info != Success) - { - if (m_error < Base::m_tolerance * 10) - { - std::cout << "Unsatisfactory abort" << std::endl; - } - else - { - std::cout << "Legitimate abort" << std::endl; - } + /** \internal */ - } - #endif - #if SAVE_FAILS - #if IDRSTAB_INF_NORM - if ((b - matrix()*x).template lpNorm() / b.template lpNorm() > 1e-6) - #else - if ((b - matrix()*x).norm() / b.norm() > 1e-6) - #endif - { - #if IDRSTAB_INF_NORM - std::cout << "True residual bad: " << (b - matrix()*x).template lpNorm() / b.template lpNorm() << std::endl; - #else - std::cout << "True residual bad: " << (b - matrix()*x).norm() / b.norm() << std::endl; - #endif - using namespace std::chrono; - typedef typename Dest::Scalar Scalar; - typedef Matrix VectorType; - int64_t timestamp = duration_cast(system_clock::now().time_since_epoch()).count(); - Eigen::SparseMatrix A = matrix(); - Eigen::SparseLU, Eigen::COLAMDOrdering > solver; - // fill A and b; - // Compute the ordering permutation vector from the structural pattern of A - solver.analyzePattern(A); - // Compute the numerical factorization - solver.factorize(A); - //Use the factors to solve the linear system - VectorType x_LU = solver.solve(b); - Eigen::saveMarket(A, "A_" + std::to_string(timestamp)); - Eigen::saveMarketVector(b, "b_" + std::to_string(timestamp)); - Eigen::saveMarketVector(x_LU, "x_LU_" + std::to_string(timestamp)); - Eigen::saveMarketVector(x, "x_IDRSTAB_" + std::to_string(timestamp)); - std::cout << "Segfault: to trigger the debugger" << std::endl; - //*(char*)0 = 0; - } - #endif - - } + template + void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const + { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + bool ret = internal::idrstab(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, + m_L, m_S); + + m_info = (!ret) ? NumericalIssue + : m_error <= 10 * Base::m_tolerance ? Success + : NoConvergence; + } - /** \internal */ - //TODO: Should setL and setS be supported? Or should the defaults L=2,S=4 be adopted? - void setL(Index L) + /** \internal */ + /** Sets the parameter L, indicating the amount of minimize residual steps are used. */ + void setL(Index L) + { + if (L < 1) { - //Truncate to even number - L = (L / 2) * 2; - - //L must be positive - L = L < 1 ? 2 : L; - - //L may not exceed 8 - L = L > 8 ? 8 : L; - - m_L = L == 6 ? 4 : L; + L = 2; } - void setS(Index S) - { - //Truncate to even number - S = (S / 2) * 2; - - //S must be positive - S = S < 1 ? 2 : S; - - //S may not exceed 8 - S = S > 8 ? 8 : S; - m_S = S == 6 ? 4 : S; + m_L = L; + } + /** \internal */ + /** Sets the parameter S, indicating the dimension of the shadow residual space.. */ + void setS(Index S) + { + if (S > 0) + { + m_S = S; } + } - protected: - + protected: }; -} +} // namespace Eigen #endif /* idrstab_h */ -- GitLab From 80d5cf00ecb2dd525dd4b001211d87984ede4fcc Mon Sep 17 00:00:00 2001 From: jwehner Date: Mon, 18 May 2020 11:14:50 +0200 Subject: [PATCH 07/18] Made code C++03 compliant --- Eigen/src/IterativeLinearSolvers/BiCGSTABL.h | 10 +++++----- Eigen/src/IterativeLinearSolvers/IDRStab.h | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h index d67ab48a8..11df6aa6b 100755 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h @@ -171,8 +171,8 @@ namespace Eigen Furthermore a "group wise update" strategy is used to combine updates, which improves accuracy. */ - Mx = std::max(Mx, zeta); //Maximum norm of residuals since last update of x. - Mr = std::max(Mr, zeta); //Maximum norm of residuals since last computation of the true residual. + Mx = (std::max)(Mx, zeta); //Maximum norm of residuals since last update of x. + Mr = (std::max)(Mr, zeta); //Maximum norm of residuals since last computation of the true residual. if (zeta < delta * zeta0 && zeta0 <= Mx) { @@ -224,14 +224,14 @@ namespace Eigen } // namespace internal template > + typename _Preconditioner = DiagonalPreconditioner > class BiCGSTABL; namespace internal { template - struct traits> + struct traits > { typedef _MatrixType MatrixType; typedef _Preconditioner Preconditioner; @@ -240,7 +240,7 @@ namespace Eigen } // namespace internal template - class BiCGSTABL : public IterativeSolverBase> + class BiCGSTABL : public IterativeSolverBase > { typedef IterativeSolverBase Base; using Base::m_error; diff --git a/Eigen/src/IterativeLinearSolvers/IDRStab.h b/Eigen/src/IterativeLinearSolvers/IDRStab.h index 9e9a21b22..6a2def000 100755 --- a/Eigen/src/IterativeLinearSolvers/IDRStab.h +++ b/Eigen/src/IterativeLinearSolvers/IDRStab.h @@ -28,8 +28,8 @@ Right-preconditioning based on Ref. 3 is implemented here. */ -#ifndef idrstab_h -#define idrstab_h +#ifndef EIGEN_IDRSTAB_H +#define EIGEN_IDRSTAB_H #include #include @@ -146,7 +146,7 @@ namespace Eigen for (Index i = 0; i < col_index; ++i) { //"Normalization factor" (v is normalized already) - auto v = U.col(i).head(N); + VectorType v = U.col(i).head(N); //"How much do v and w have in common?" h_FOM(i, col_index - 1) = v.dot(w); @@ -521,14 +521,14 @@ namespace Eigen } // namespace internal template > + typename _Preconditioner = DiagonalPreconditioner > class IDRStab; namespace internal { - + template - struct traits> + struct traits > { typedef _MatrixType MatrixType; typedef _Preconditioner Preconditioner; @@ -537,7 +537,7 @@ namespace Eigen } // namespace internal template - class IDRStab : public IterativeSolverBase> + class IDRStab : public IterativeSolverBase > { typedef IterativeSolverBase Base; using Base::m_error; @@ -622,4 +622,4 @@ namespace Eigen } // namespace Eigen -#endif /* idrstab_h */ +#endif /* EIGEN_IDRSTAB_H */ -- GitLab From 1693231a3d750675768b67504e1bc9392fd44583 Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 25 May 2020 00:38:42 +0200 Subject: [PATCH 08/18] Complete the list of contributers --- Eigen/src/IterativeLinearSolvers/BiCGSTABL.h | 1 + Eigen/src/IterativeLinearSolvers/IDRStab.h | 1 + 2 files changed, 2 insertions(+) diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h index 11df6aa6b..28ba9ce14 100755 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h @@ -3,6 +3,7 @@ // // Copyright (C) 2020 Chris Schoutrop // Copyright (C) 2020 Jens Wehner +// Copyright (C) 2020 Jan van Dijk // Copyright (C) 2020 Adithya Vijaykumar // // This Source Code Form is subject to the terms of the Mozilla diff --git a/Eigen/src/IterativeLinearSolvers/IDRStab.h b/Eigen/src/IterativeLinearSolvers/IDRStab.h index 6a2def000..9f6fb8dca 100755 --- a/Eigen/src/IterativeLinearSolvers/IDRStab.h +++ b/Eigen/src/IterativeLinearSolvers/IDRStab.h @@ -5,6 +5,7 @@ // Copyright (C) 2020 Mischa Senders // Copyright (C) 2020 Lex Kuijpers // Copyright (C) 2020 Jens Wehner +// Copyright (C) 2020 Jan van Dijk // Copyright (C) 2020 Adithya Vijaykumar // // This Source Code Form is subject to the terms of the Mozilla -- GitLab From 4401fd4eb9ac5bc91d3e8281e2041ede939fa331 Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 25 May 2020 15:18:41 +0200 Subject: [PATCH 09/18] Fixed 2 small details that got overlooked when cleaning BiCGSTABL previously --- Eigen/src/IterativeLinearSolvers/BiCGSTABL.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h index 28ba9ce14..7da8505ea 100755 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h @@ -41,7 +41,7 @@ namespace Eigen * \param iters On input the max number of iteration, on output the number of performed iterations. * \param tol_error On input the tolerance error, on output an estimation of the relative error. * \param L On input Number of additional GMRES steps to take. If L is too large (~20) instabilities occur. - * \return false in the case of numerical issue, for example a break down of BiCGSTABL. + * \return false in the case of numerical issue, for example a break down of BiCGSTABL. */ template bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, @@ -85,7 +85,7 @@ namespace Eigen bool bicg_convergence = false; - RealScalar zeta0 = rhs.norm(); + RealScalar zeta0 = r0.norm(); RealScalar zeta = zeta0; RealScalar Mx = zeta0; RealScalar Mr = zeta0; @@ -104,7 +104,7 @@ namespace Eigen { Scalar rho1 = rShadow.dot(rHat.col(j)); - if ((numext::isnan)(rho1) || rho0 != 0.0) + if ((numext::isnan)(rho1) || rho0 == 0.0) { tol_error = zeta / zeta0; return false; @@ -114,14 +114,14 @@ namespace Eigen rho0 = rho1; //Update search directions - uHat.leftCols(j) = rHat.leftCols(j) - beta * uHat.leftCols(j); + uHat.leftCols(j+1) = rHat.leftCols(j) - beta * uHat.leftCols(j+1); uHat.col(j + 1) = precond.solve(mat * uHat.col(j)); alpha = rho1 / (rShadow.dot(uHat.col(j + 1))); //Update residuals //TODO check this block - rHat.leftCols(j) -= alpha * uHat.block(0, 1, N, j); + rHat.leftCols(j+1) -= alpha * uHat.block(0, 1, N, j+1); rHat.col(j + 1) = precond.solve(mat * rHat.col(j)); -- GitLab From d0fc0976a3a95bf839de53f206910508273d9b74 Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 25 May 2020 16:50:48 +0200 Subject: [PATCH 10/18] Fixed a typo that's only apparent with debug enabled --- Eigen/src/IterativeLinearSolvers/BiCGSTABL.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h index 7da8505ea..09ae9ae9a 100755 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h @@ -114,7 +114,7 @@ namespace Eigen rho0 = rho1; //Update search directions - uHat.leftCols(j+1) = rHat.leftCols(j) - beta * uHat.leftCols(j+1); + uHat.leftCols(j+1) = rHat.leftCols(j+1) - beta * uHat.leftCols(j+1); uHat.col(j + 1) = precond.solve(mat * uHat.col(j)); alpha = rho1 / (rShadow.dot(uHat.col(j + 1))); -- GitLab From 974a9e06e3e6524cc08def9c35269a7d38098078 Mon Sep 17 00:00:00 2001 From: jwehner Date: Mon, 27 Jul 2020 15:38:54 +0200 Subject: [PATCH 11/18] moved code to unsupported and fixed MR comments --- Eigen/IterativeLinearSolvers | 2 - Eigen/src/IterativeLinearSolvers/BiCGSTABL.h | 315 --------- Eigen/src/IterativeLinearSolvers/IDRStab.h | 626 ------------------ test/CMakeLists.txt | 2 - unsupported/Eigen/IterativeSolvers | 6 +- .../Eigen/src/IterativeSolvers/BiCGSTABL.h | 285 ++++++++ .../Eigen/src/IterativeSolvers/IDRStab.h | 569 ++++++++++++++++ unsupported/test/CMakeLists.txt | 2 + {test => unsupported/test}/bicgstabl.cpp | 4 +- {test => unsupported/test}/idrstab.cpp | 4 +- 10 files changed, 865 insertions(+), 950 deletions(-) delete mode 100755 Eigen/src/IterativeLinearSolvers/BiCGSTABL.h delete mode 100755 Eigen/src/IterativeLinearSolvers/IDRStab.h create mode 100755 unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h create mode 100755 unsupported/Eigen/src/IterativeSolvers/IDRStab.h rename {test => unsupported/test}/bicgstabl.cpp (93%) rename {test => unsupported/test}/idrstab.cpp (94%) diff --git a/Eigen/IterativeLinearSolvers b/Eigen/IterativeLinearSolvers index 0320895ea..7cae51df1 100644 --- a/Eigen/IterativeLinearSolvers +++ b/Eigen/IterativeLinearSolvers @@ -40,8 +40,6 @@ #include "src/IterativeLinearSolvers/ConjugateGradient.h" #include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" #include "src/IterativeLinearSolvers/BiCGSTAB.h" -#include "src/IterativeLinearSolvers/IDRStab.h" -#include "src/IterativeLinearSolvers/BiCGSTABL.h" #include "src/IterativeLinearSolvers/IncompleteLUT.h" #include "src/IterativeLinearSolvers/IncompleteCholesky.h" diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h b/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h deleted file mode 100755 index 09ae9ae9a..000000000 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTABL.h +++ /dev/null @@ -1,315 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2020 Chris Schoutrop -// Copyright (C) 2020 Jens Wehner -// Copyright (C) 2020 Jan van Dijk -// Copyright (C) 2020 Adithya Vijaykumar -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -/* - - This implementation of BiCGStab(L) is based on the papers - General algorithm: - 1. G.L.G. Sleijpen, D.R. Fokkema. (1993). BiCGstab(l) for linear equations involving unsymmetric matrices with complex spectrum. Electronic Transactions on Numerical Analysis. - Polynomial step update: - 2. G.L.G. Sleijpen, M.B. Van Gijzen. (2010) Exploiting BiCGstab(l) strategies to induce dimension reduction SIAM Journal on Scientific Computing. - 3. Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving linear systems of equations. Universiteit Utrecht. Mathematisch Instituut, 1996 -*/ - -#ifndef EIGEN_BICGSTABL_H -#define EIGEN_BICGSTABL_H - -#include -#include -#include - -namespace Eigen -{ - - namespace internal - { - /** \internal Low-level bi conjugate gradient stabilized algorithm with L additional residual minimization steps - * \param mat The matrix A - * \param rhs The right hand side vector b - * \param x On input and initial solution, on output the computed solution. - * \param precond A preconditioner being able to efficiently solve for an - * approximation of Ax=b (regardless of b) - * \param iters On input the max number of iteration, on output the number of performed iterations. - * \param tol_error On input the tolerance error, on output an estimation of the relative error. - * \param L On input Number of additional GMRES steps to take. If L is too large (~20) instabilities occur. - * \return false in the case of numerical issue, for example a break down of BiCGSTABL. - */ - template - bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, - const Preconditioner &precond, Index &iters, - typename Dest::RealScalar &tol_error, Index L) - { - using std::abs; - using std::sqrt; - typedef typename Dest::RealScalar RealScalar; - typedef typename Dest::Scalar Scalar; - Index N = rhs.size(); - L = L < x.rows() ? L : x.rows(); - - Index k = 0; - - RealScalar tol = tol_error; - Index maxIters = iters; - - typedef Matrix VectorType; - typedef Matrix DenseMatrixType; - - //We start with an initial guess x_0 and let us set r_0 as (residual calculated from x_0) - VectorType r0 = precond.solve(rhs - mat * x); //r_0 - //rShadow is arbritary, but must not be orthogonal to r0. - VectorType rShadow = r0; - - VectorType x_prime = x; - x.setZero(); - VectorType b_prime = r0; - - //Other vectors and scalars initialization - Scalar rho0 = 1.0; - Scalar alpha = 0.0; - Scalar omega = 1.0; - - DenseMatrixType rHat(N, L + 1); - DenseMatrixType uHat(N, L + 1); - - rHat.col(0) = r0; - uHat.col(0).fill(0.0); - - bool bicg_convergence = false; - - RealScalar zeta0 = r0.norm(); - RealScalar zeta = zeta0; - RealScalar Mx = zeta0; - RealScalar Mr = zeta0; - - //Criterion for when to apply the group-wise update, conform ref 3. - const RealScalar delta = 0.01; - - bool compute_res = false; - bool update_app = false; - - while (zeta > tol * zeta0 && k < maxIters) - { - rho0 *= -omega; - - for (Index j = 0; j < L; ++j) - { - Scalar rho1 = rShadow.dot(rHat.col(j)); - - if ((numext::isnan)(rho1) || rho0 == 0.0) - { - tol_error = zeta / zeta0; - return false; - } - - Scalar beta = alpha * (rho1 / rho0); - rho0 = rho1; - - //Update search directions - uHat.leftCols(j+1) = rHat.leftCols(j+1) - beta * uHat.leftCols(j+1); - - uHat.col(j + 1) = precond.solve(mat * uHat.col(j)); - alpha = rho1 / (rShadow.dot(uHat.col(j + 1))); - - //Update residuals - //TODO check this block - rHat.leftCols(j+1) -= alpha * uHat.block(0, 1, N, j+1); - - rHat.col(j + 1) = precond.solve(mat * rHat.col(j)); - - //Complete BiCG iteration by updating x - x += alpha * uHat.col(0); - - //Check for early exit - zeta = rHat.col(0).norm(); - - if (zeta < tol * zeta0) - { - /* - Convergence was achieved during BiCG step. - Without this check BiCGStab(L) fails for trivial matrices, such as when the preconditioner already is the inverse, - or the input matrix is identity. - */ - bicg_convergence = true; - break; - } - } - - if (bicg_convergence == false) - { - /* - The polynomial/minimize residual step. - - QR Householder method for argmin is more stable than (modified) Gram-Schmidt, in the sense that there is less loss of orthogonality. - It is more accurate than solving the normal equations, since the normal equations scale with condition number squared. - */ - VectorType gamma = (rHat.rightCols(L)).householderQr().solve(rHat.col(0)); - x += rHat.leftCols(L) * gamma; - rHat.col(0) -= rHat.rightCols(L) * gamma; - uHat.col(0) -= uHat.rightCols(L) * gamma; - zeta = rHat.col(0).norm(); - omega = gamma(L - 1); - } - - //TODO: Duplicate update code can be removed for the L=1 and L!=1 case. - //TODO: Use analytical expression instead of householder for L=1. - k++; - - /* - Reliable update part - - The recursively computed residual can deviate from the actual residual after several iterations. However, computing the - residual from the definition costs extra MVs and should not be done at each iteration. - The reliable update strategy computes the true residual from the definition: r=b-A*x at strategic intervals. - Furthermore a "group wise update" strategy is used to combine updates, which improves accuracy. - */ - - Mx = (std::max)(Mx, zeta); //Maximum norm of residuals since last update of x. - Mr = (std::max)(Mr, zeta); //Maximum norm of residuals since last computation of the true residual. - - if (zeta < delta * zeta0 && zeta0 <= Mx) - { - update_app = true; - } - - if (update_app || (zeta < delta * Mr && zeta0 <= Mr)) - { - compute_res = true; - } - - if (bicg_convergence) - { - update_app = true; - compute_res = true; - bicg_convergence = false; - } - - if (compute_res) - { - //rHat.col(0) = b_prime - mat * x; //Fokkema paper pseudocode - //Fokkema paper Fortan code L250-254 - rHat.col(0) = b_prime - precond.solve(mat * x); - zeta = rHat.col(0).norm(); - Mr = zeta; - - if (update_app) - { - //After the group wise update, the original problem is translated to a shifted one. - x_prime += x; - x.setZero(); - b_prime = rHat.col(0); - Mx = zeta; - } - } - - compute_res = false; - update_app = false; - } - - //Convert internal variable to the true solution vector x - x += x_prime; - tol_error = zeta / zeta0; - iters = k; - - return true; - } - - } // namespace internal - - template > - class BiCGSTABL; - - namespace internal - { - - template - struct traits > - { - typedef _MatrixType MatrixType; - typedef _Preconditioner Preconditioner; - }; - - } // namespace internal - - template - class BiCGSTABL : public IterativeSolverBase > - { - typedef IterativeSolverBase Base; - using Base::m_error; - using Base::m_info; - using Base::m_isInitialized; - using Base::m_iterations; - using Base::matrix; - Index m_L; - - public: - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef _Preconditioner Preconditioner; - - public: - /** Default constructor. */ - BiCGSTABL() : Base() { m_L = 2; } - - /** Initialize the solver with matrix \a A for further \c Ax=b solving. - - This constructor is a shortcut for the default constructor followed - by a call to compute(). - - \warning this class stores a reference to the matrix A as well as some - precomputed values that depend on it. Therefore, if \a A is changed - this class becomes invalid. Call compute() to update it with the new - matrix A, or modify a copy of A. - */ - template - explicit BiCGSTABL(const EigenBase &A) : Base(A.derived()) { m_L = 2; } - - ~BiCGSTABL() {} - - /** \internal */ - /** Loops over the number of columns of b and does the following: - 1. sets the tolerence and maxIterations - 2. Calls the function that has the core solver routine - */ - template - void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const - { - m_iterations = Base::maxIterations(); - m_error = Base::m_tolerance; - - bool ret = internal::bicgstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L); - - m_info = (!ret) ? NumericalIssue - : m_error <= Base::m_tolerance ? Success - : NoConvergence; - } - - /** \internal */ - /** Sets the parameter L, indicating the amount of minimize residual steps are used. */ - void setL(Index L) - { - if (L < 1) - { - L = 2; - } - - m_L = L; - } - - protected: - - }; - -} // namespace Eigen - -#endif /* EIGEN_BICGSTABL_H */ diff --git a/Eigen/src/IterativeLinearSolvers/IDRStab.h b/Eigen/src/IterativeLinearSolvers/IDRStab.h deleted file mode 100755 index 9f6fb8dca..000000000 --- a/Eigen/src/IterativeLinearSolvers/IDRStab.h +++ /dev/null @@ -1,626 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2020 Chris Schoutrop -// Copyright (C) 2020 Mischa Senders -// Copyright (C) 2020 Lex Kuijpers -// Copyright (C) 2020 Jens Wehner -// Copyright (C) 2020 Jan van Dijk -// Copyright (C) 2020 Adithya Vijaykumar -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -/* - - The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L) - //TODO: elaborate what this improves over BiCGStab here - - Possible optimizations (PO): - -See //PO: notes in the code - - This implementation of IDRStab is based on - 1. Aihara, K., Abe, K., & Ishiwata, E. (2014). A variant of IDRstab with reliable update strategies for solving sparse linear systems. Journal of Computational and Applied Mathematics, 259, 244-258. doi:10.1016/j.cam.2013.08.028 - 2. Aihara, K., Abe, K., & Ishiwata, E. (2015). Preconditioned IDRStab Algorithms for Solving Nonsymmetric Linear Systems. International Journal of Applied Mathematics, 45(3). - 3. Saad, Y. (2003). Iterative Methods for Sparse Linear Systems: Second Edition. Philadelphia, PA: SIAM. - 4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family of Simple and Fast Algorithms for Solving Large Nonsymmetric Systems of Linear Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062. doi:10.1137/070685804 - 5. Sonneveld, P. (2012). On the convergence behavior of IDR (s) and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598. - - Right-preconditioning based on Ref. 3 is implemented here. -*/ - -#ifndef EIGEN_IDRSTAB_H -#define EIGEN_IDRSTAB_H - -#include -#include -#include - -namespace Eigen -{ - - namespace internal - { - - template - bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, - const Preconditioner &precond, Index &iters, - typename Dest::RealScalar &tol_error, Index L, Index S) - { - - /* - Setup and type definitions. - */ - using std::abs; - using std::sqrt; - typedef typename Dest::Scalar Scalar; - typedef typename Dest::RealScalar RealScalar; - typedef Matrix VectorType; - typedef Matrix DenseMatrixTypeCol; - typedef Matrix DenseMatrixTypeRow; - - const Index N = x.rows(); - - Index k = 0; //Iteration counter - const Index maxIters = iters; - - const RealScalar rhs_norm = rhs.norm(); - const RealScalar tol2 = tol_error * rhs_norm; - - if (rhs_norm == 0) - { - /* - If b==0, then the exact solution is x=0. - rhs_norm is needed for other calculations anyways, this exit is a freebie. - */ - /* - exit - */ - x.setZero(); - tol_error = 0.0; - return true; - } - //Construct decomposition objects beforehand. - ColPivHouseholderQR qr_solver; - FullPivLU lu_solver; - - if (S >= N || L >= N) - { - /* - The matrix is very small, or the choice of L and S is very poor - in that case solving directly will be best. - */ - /* - Exit - */ - lu_solver.compute(DenseMatrixTypeRow(mat)); - x = lu_solver.solve(rhs); - tol_error = (rhs - mat * x).norm() / rhs_norm; - return true; - } - - //Define maximum sizes to prevent any reallocation later on. - VectorType u(N * (L + 1)); - VectorType r(N * (L + 1)); - DenseMatrixTypeCol V(N * (L + 1), S); - - DenseMatrixTypeCol rHat(N, L + 1); - - VectorType alpha(S); - VectorType gamma(L); - VectorType update(N); - - /* - Main IDRStab algorithm - */ - //Set up the initial residual - r.head(N) = rhs - mat * precond.solve(x); - tol_error = r.head(N).norm(); - - DenseMatrixTypeRow h_FOM(S, S - 1); - h_FOM.setZero(); - - /* - Determine an initial U matrix of size N x S - */ - - DenseMatrixTypeCol U(N * (L + 1), S); - for (Index col_index = 0; col_index < S; ++col_index) - { - //Arnoldi-like process to generate a set of orthogonal vectors spanning {u,A*u,A*A*u,...,A^(S-1)*u}. - //This construction can be combined with the Full Orthogonalization Method (FOM) from Ref.3 to provide a possible early exit with no additional MV. - if (col_index != 0) - { - /* - Original Gram-Schmidt orthogonalization strategy from Ref. 1: - */ - //u.head(N) -= U.topLeftCorner(N, q) * (U.topLeftCorner(N, q).adjoint() * u.head(N)); - - /* - Modified Gram-Schmidt strategy: - Note that GS and MGS are mathematically equivalent, they are NOT numerically equivalent. - - Eventough h is a scalar, converting the dot product to Scalar is not supported: - http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1610 - */ - VectorType w = mat * precond.solve(u.head(N)); - for (Index i = 0; i < col_index; ++i) - { - //"Normalization factor" (v is normalized already) - VectorType v = U.col(i).head(N); - - //"How much do v and w have in common?" - h_FOM(i, col_index - 1) = v.dot(w); - - //"Subtract the part they have in common" - w -= h_FOM(i, col_index - 1) * v; - } - u.head(N) = w; - h_FOM(col_index, col_index - 1) = u.head(N).norm(); - - if (abs(h_FOM(col_index, col_index - 1)) != 0.0) - { - /* - This only happens if u is NOT exactly zero. In case it is exactly zero - it would imply that that this u has no component in the direction of the current residual. - - By then setting u to zero it will not contribute any further (as it should). - Whereas attempting to normalize results in division by zero. - - Such cases occur if: - 1. The basis of dimension 1) - { - /* - Check for early FOM exit. - */ - Scalar beta = r.head(N).norm(); - VectorType e1 = VectorType::Zero(S - 1); - e1(0) = beta; - lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1)); - VectorType y = lu_solver.solve(e1); - VectorType x2 = x + U.topLeftCorner(N, S - 1) * y; - - //Using proposition 6.7 in Saad, one MV can be saved to calculate the residual - RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.col(S - 1).head(N)).norm(); - - if (FOM_residual < tol2) - { - /* - Exit, the FOM algorithm was already accurate enough - */ - iters = k; - x = precond.solve(x2); - tol_error = FOM_residual / rhs_norm; - return true; - } - } - - /* - Select an initial (N x S) matrix R0. - 1. Generate random R0, orthonormalize the result. - 2. This results in R0, however to save memory and compute we only need the adjoint of R0. This is given by the matrix R_T.\ - Additionally, the matrix (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the anti-distributivity property of the adjoint. - This results in AR_T, which is constant if R_T does not have to be regenerated and can be precomputed. Based on reference 4, - this has zero probability in exact arithmetic. However in practice it does (extremely infrequently) occur, - most notably for small matrices. - */ - //PO: To save on memory consumption identity can be sparse - //PO: can this be done with colPiv/fullPiv version as well? This would save 1 construction of a HouseholderQR object - - //Original IDRStab and Kensuke choose S random vectors: - HouseholderQR qr(DenseMatrixTypeCol::Random(N, S)); - DenseMatrixTypeRow R_T = (qr.householderQ() * DenseMatrixTypeCol::Identity(N, - S)) - .adjoint(); - DenseMatrixTypeRow AR_T = DenseMatrixTypeRow(R_T * mat); - - //Pre-allocate sigma, this space will be recycled without additional allocations. - DenseMatrixTypeCol sigma(S, S); - - Index rt_counter = k; //Iteration at which R_T was reset last - bool reset_while = false; //Should the while loop be reset for some reason? - - VectorType Q(S, 1); //Vector containing the row-scaling applied to sigma - VectorType P(S, 1); //Vector containing the column-scaling applied to sigma - DenseMatrixTypeCol QAP(S, S); //Scaled sigma - bool repair_flag = false; - RealScalar residual_0 = tol_error; - //bool apply_r_exit = false; - - while (k < maxIters) - { - - for (Index j = 1; j <= L; ++j) - { - //Cache some indexing variables that occur frequently and are constant. - const Index Nj = N * j; - const Index Nj_plus_1 = N * (j + 1); - const Index Nj_min_1 = N * (j - 1); - - /* - The IDR Step - */ - //Construction of the sigma-matrix, and the decomposition of sigma. - for (Index i = 0; i < S; ++i) - { - sigma.col(i).noalias() = AR_T * precond.solve(U.block(Nj_min_1, i, N, 1)); - } - /* - Suspected is that sigma could be badly scaled, since causes alpha~=0, but the - update vector is not zero. To improve stability we scale with absolute row and col sums first. - Sigma can become badly scaled (but still well-conditioned). - A bad sigma also happens if R_T is not chosen properly, for example if R_T is zeros sigma would be zeros as well. - The effect of this is a left-right preconditioner, instead of solving Ax=b, we solve - Q*A*P*inv(P)*x=Q*b. - */ - - Q = (sigma.cwiseAbs().rowwise().sum()).cwiseInverse(); //Calculate absolute inverse row sum - QAP = Q.asDiagonal() * sigma; //Scale with inverse absolute row sums - P = (QAP.cwiseAbs().colwise().sum()).cwiseInverse(); //Calculate absolute inverse column sum - QAP = QAP * P.asDiagonal(); //Scale with inverse absolute column sums - lu_solver.compute(QAP); - //Obtain the update coefficients alpha - if (j == 1) - { - //alpha=inverse(sigma)*(R_T*r_0); - alpha.noalias() = lu_solver.solve(Q.asDiagonal() * R_T * r.head(N)); - } - else - { - //alpha=inverse(sigma)*(AR_T*r_{j-2}) - alpha.noalias() = lu_solver.solve(Q.asDiagonal() * AR_T * precond.solve(r.segment(N * (j - 2), N))); - } - //Unscale the solution - alpha = P.asDiagonal() * alpha; - - double old_res = tol_error; - - //Obtain new solution and residual from this update - update.noalias() = U.topRows(N) * alpha; - r.head(N) -= mat * precond.solve(update); - x += update; - - for (Index i = 1; i <= j - 2; ++i) - { - //This only affects the case L>2 - r.segment(N * i, N) -= U.block(N * (i + 1), 0, N, S) * alpha; - } - if (j > 1) - { - //r=[r;A*r_{j-2}] - r.segment(Nj_min_1, N).noalias() = mat * precond.solve(r.segment(N * (j - 2), N)); - } - tol_error = r.head(N).norm(); - - if (tol_error < tol2) - { - //If at this point the algorithm has converged, exit. - reset_while = true; - break; - } - - if (repair_flag == false && tol_error > 10 * residual_0) - { - //Sonneveld's repair flag suggestion from [5] - //This massively reduces problems with false residual estimates (if they'd occur) - repair_flag = true; - } - if (repair_flag && 1000 * tol_error < residual_0) - { - //1000 comes from Sonneveld's repair flag suggestion from [5] - r.head(N) = rhs - mat * precond.solve(x); - repair_flag = false; - } - - bool reset_R_T = false; - if (alpha.norm() * rhs_norm < S * NumTraits::epsilon() * old_res) - { - //This would indicate the update computed by alpha did nothing much to decrease the residual - //apparantly we've also not converged either. - //TODO: Check if there is some better criterion, the current one is a bit handwavy. - reset_R_T = true; - } - - if (reset_R_T) - { - if (k - rt_counter > 0) - { - /* - Only regenerate if it didn't already happen this iteration. - */ - //Choose new R0 and try again - qr.compute(DenseMatrixTypeCol::Random(N, S)); - R_T = (qr.householderQ() * DenseMatrixTypeCol::Identity(N, S)).transpose(); //.adjoint() vs .transpose() makes no difference, R_T is random anyways. - /* - Additionally, the matrix (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the anti-distributivity property of the adjoint. - This results in AR_T, which can be precomputed. - */ - AR_T = DenseMatrixTypeRow(R_T * mat); - j = 0; //WARNING reset the for loop counter - rt_counter = k; - continue; - } - } - bool break_normalization = false; - for (Index q = 1; q <= S; ++q) - { - if (q == 1) - { - //u = r; - u.head(Nj_plus_1) = r.topRows(Nj_plus_1); - } - else - { - //u=[u_1;u_2;...;u_j] - u.head(Nj) = u.segment(N, Nj); - } - //Obtain the update coefficients beta implicitly - //beta=lu_sigma.solve(AR_T * u.block(Nj_min_1, 0, N, 1) - - u.head(Nj) -= U.topRows(Nj) * P.asDiagonal() * lu_solver.solve(Q.asDiagonal() * AR_T * precond.solve(u.segment(Nj_min_1, N))); - - //u=[u;Au_{j-1}] - u.segment(Nj, N).noalias() = mat * precond.solve(u.segment(Nj_min_1, N)); - - //Orthonormalize u_j to the columns of V_j(:,1:q-1) - if (q > 1) - { - /* - Modified Gram-Schmidt-like procedure to make u orthogonal to the columns of V from Ref. 1. - - The vector mu from Ref. 1 is obtained implicitly: - mu=V.block(Nj, 0, N, q - 1).adjoint() * u.block(Nj, 0, N, 1). - */ - - for (Index i = 0; i <= q - 2; ++i) - { - //"Normalization factor" - Scalar h = V.col(i).segment(Nj, N).squaredNorm(); - - //"How much do u and V have in common?" - h = V.col(i).segment(Nj, N).dot(u.segment(Nj, N)) / h; - - //"Subtract the part they have in common" - u.head(Nj_plus_1) -= h * V.block(0, i, Nj_plus_1, 1); - } - } - //Normalize u and assign to a column of V - Scalar normalization_constant = u.block(Nj, 0, N, 1).norm(); - - if (normalization_constant != 0.0) - { - /* - If u is exactly zero, this will lead to a NaN. Small, non-zero u is fine. In the case of NaN the algorithm breaks down, - eventhough it could have continued, since u zero implies that there is no further update in a given direction. - */ - u.head(Nj_plus_1) /= normalization_constant; - } - else - { - - u.head(Nj_plus_1).setZero(); - if (tol_error < tol2 || tol_error < 1e4 * NumTraits::epsilon()) - { - //Just quit, we've converged - iters = k; - x = precond.solve(x); - tol_error = (rhs - mat * x).norm() / rhs_norm; - return true; - } - break_normalization = true; - break; - } - - V.block(0, q - 1, Nj_plus_1, 1).noalias() = u.head(Nj_plus_1); - } - - if (break_normalization == false) - { - U = V; - } - } - if (reset_while) - { - reset_while = false; - tol_error = r.head(N).norm(); - if (tol_error < tol2) - { - /* - Slightly early exit by moving the criterion before the update of U, - after the main while loop the result of that calculation would not be needed. - */ - break; - } - continue; - } - - //r=[r;mat*r_{L-1}] - //Save this in rHat, the storage form for rHat is more suitable for the argmin step than the way r is stored. - //In Eigen 3.4 this step can be compactly done via: rHat = r.reshaped(N, L + 1); - r.segment(N * L, N).noalias() = mat * precond.solve(r.segment(N * (L - 1), N)); - - for (Index i = 0; i <= L; ++i) - { - rHat.col(i) = r.segment(N * i, N); - } - - /* - The polynomial step - */ - qr_solver.compute(rHat.rightCols(L)); - gamma.noalias() = qr_solver.solve(r.head(N)); - - //Update solution and residual using the "minimized residual coefficients" - update.noalias() = rHat.leftCols(L) * gamma; - x += update; - r.head(N) -= mat * precond.solve(update); - - //Update iteration info - ++k; - tol_error = r.head(N).norm(); - - if (tol_error < tol2) - { - //Slightly early exit by moving the criterion before the update of U, - //after the main while loop the result of that calculation would not be needed. - break; - } - - if (repair_flag == false && tol_error > 10 * residual_0) - { - //Sonneveld's repair flag suggestion from [5] - //This massively reduces problems with false residual estimates (if they'd occur) - repair_flag = true; - } - if (repair_flag && 1000 * tol_error < residual_0) - { - r.head(N) = rhs - mat * precond.solve(x); - repair_flag = false; - } - - /* - U=U0-sum(gamma_j*U_j) - Consider the first iteration. Then U only contains U0, so at the start of the while-loop - U should be U0. Therefore only the first N rows of U have to be updated. - */ - for (Index i = 1; i <= L; ++i) - { - U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1); - } - } - - /* - Exit after the while loop terminated. - */ - iters = k; - x = precond.solve(x); - tol_error = tol_error / rhs_norm; - return true; - } - - } // namespace internal - - template > - class IDRStab; - - namespace internal - { - - template - struct traits > - { - typedef _MatrixType MatrixType; - typedef _Preconditioner Preconditioner; - }; - - } // namespace internal - - template - class IDRStab : public IterativeSolverBase > - { - typedef IterativeSolverBase Base; - using Base::m_error; - using Base::m_info; - using Base::m_isInitialized; - using Base::m_iterations; - using Base::matrix; - Index m_L; - Index m_S; - - public: - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef _Preconditioner Preconditioner; - - public: - /** Default constructor. */ - IDRStab() : Base() - { - m_L = 2; - m_S = 4; - } - - /** Initialize the solver with matrix \a A for further \c Ax=b solving. - - This constructor is a shortcut for the default constructor followed - by a call to compute(). - - \warning this class stores a reference to the matrix A as well as some - precomputed values that depend on it. Therefore, if \a A is changed - this class becomes invalid. Call compute() to update it with the new - matrix A, or modify a copy of A. - */ - template - explicit IDRStab(const EigenBase &A) : Base(A.derived()) - { - m_L = 2; - m_S = 4; - } - - ~IDRStab() {} - - /** \internal */ - - template - void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const - { - m_iterations = Base::maxIterations(); - m_error = Base::m_tolerance; - bool ret = internal::idrstab(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, - m_L, m_S); - - m_info = (!ret) ? NumericalIssue - : m_error <= 10 * Base::m_tolerance ? Success - : NoConvergence; - } - - /** \internal */ - /** Sets the parameter L, indicating the amount of minimize residual steps are used. */ - void setL(Index L) - { - if (L < 1) - { - L = 2; - } - - m_L = L; - } - /** \internal */ - /** Sets the parameter S, indicating the dimension of the shadow residual space.. */ - void setS(Index S) - { - if (S > 0) - { - m_S = S; - } - } - - protected: - }; - -} // namespace Eigen - -#endif /* EIGEN_IDRSTAB_H */ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3c9b73445..43ad0b240 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -265,8 +265,6 @@ ei_add_test(simplicial_cholesky) ei_add_test(conjugate_gradient) ei_add_test(incomplete_cholesky) ei_add_test(bicgstab) -ei_add_test(bicgstabl) -ei_add_test(idrstab) ei_add_test(lscg) ei_add_test(sparselu) ei_add_test(sparseqr) diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers index 0fa129a7b..d45444a52 100644 --- a/unsupported/Eigen/IterativeSolvers +++ b/unsupported/Eigen/IterativeSolvers @@ -14,12 +14,15 @@ #include "../../Eigen/Jacobi" #include "../../Eigen/Householder" + /** * \defgroup IterativeSolvers_Module Iterative solvers module * This module aims to provide various iterative linear and non linear solver algorithms. * It currently provides: * - a constrained conjugate gradient * - a Householder GMRES implementation + * - a BiCGSTAB(L) implementation + * - a IDRStab implementation * \code * #include * \endcode @@ -38,7 +41,8 @@ #include "src/IterativeSolvers/DGMRES.h" //#include "src/IterativeSolvers/SSORPreconditioner.h" #include "src/IterativeSolvers/MINRES.h" - +#include "src/IterativeSolvers/IDRStab.h" +#include "src/IterativeSolvers/BiCGSTABL.h" #include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" //@} diff --git a/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h b/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h new file mode 100755 index 000000000..f90874368 --- /dev/null +++ b/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h @@ -0,0 +1,285 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2020 Chris Schoutrop +// Copyright (C) 2020 Jens Wehner +// Copyright (C) 2020 Jan van Dijk +// Copyright (C) 2020 Adithya Vijaykumar +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + This implementation of BiCGStab(L) is based on the papers + General algorithm: + 1. G.L.G. Sleijpen, D.R. Fokkema. (1993). BiCGstab(l) for linear equations involving unsymmetric + matrices with complex spectrum. Electronic Transactions on Numerical Analysis. Polynomial step update: + 2. G.L.G. Sleijpen, M.B. Van Gijzen. (2010) Exploiting BiCGstab(l) strategies to induce dimension + reduction SIAM Journal on Scientific Computing. + 3. Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving linear systems of equations. + Universiteit Utrecht. Mathematisch Instituut, 1996 +*/ + +#ifndef EIGEN_BICGSTABL_H +#define EIGEN_BICGSTABL_H + +namespace Eigen { + +namespace internal { +/** \internal Low-level bi conjugate gradient stabilized algorithm with L additional residual minimization steps + * \param mat The matrix A + * \param rhs The right hand side vector b + * \param x On input and initial solution, on output the computed solution. + * \param precond A preconditioner being able to efficiently solve for an + * approximation of Ax=b (regardless of b) + * \param iters On input the max number of iteration, on output the number of performed iterations. + * \param tol_error On input the tolerance error, on output an estimation of the relative error. + * \param L On input Number of additional GMRES steps to take. If L is too large (~20) instabilities occur. + * \return false in the case of numerical issue, for example a break down of BiCGSTABL. + */ +template +bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters, + typename Dest::RealScalar &tol_error, Index L) { + using numext::abs; + using numext::sqrt; + typedef typename Dest::RealScalar RealScalar; + typedef typename Dest::Scalar Scalar; + Index N = rhs.size(); + L = L < x.rows() ? L : x.rows(); + + Index k = 0; + + RealScalar tol = tol_error; + Index maxIters = iters; + + typedef Matrix VectorType; + typedef Matrix DenseMatrixType; + + // We start with an initial guess x_0 and let us set r_0 as (residual calculated from x_0) + VectorType r0 = precond.solve(rhs - mat * x); // r_0 + // rShadow is arbritary, but must not be orthogonal to r0. + VectorType rShadow = r0; + + VectorType x_prime = x; + x.setZero(); + VectorType b_prime = r0; + + // Other vectors and scalars initialization + Scalar rho0 = 1.0; + Scalar alpha = 0.0; + Scalar omega = 1.0; + + DenseMatrixType rHat(N, L + 1); + DenseMatrixType uHat(N, L + 1); + + rHat.col(0) = r0; + uHat.col(0).fill(0.0); + + bool bicg_convergence = false; + + RealScalar zeta0 = r0.norm(); + RealScalar zeta = zeta0; + RealScalar Mx = zeta0; + RealScalar Mr = zeta0; + + // Criterion for when to apply the group-wise update, conform ref 3. + const RealScalar delta = 0.01; + + bool compute_res = false; + bool update_app = false; + + while (zeta > tol * zeta0 && k < maxIters) { + rho0 *= -omega; + + for (Index j = 0; j < L; ++j) { + Scalar rho1 = rShadow.dot(rHat.col(j)); + + if ((numext::isnan)(rho1) || rho0 == 0.0) { + tol_error = zeta / zeta0; + return false; + } + Scalar beta = alpha * (rho1 / rho0); + rho0 = rho1; + // Update search directions + uHat.leftCols(j + 1) = rHat.leftCols(j + 1) - beta * uHat.leftCols(j + 1); + uHat.col(j + 1) = precond.solve(mat * uHat.col(j)); + alpha = rho1 / (rShadow.dot(uHat.col(j + 1))); + // Update residuals + // TODO check this block + rHat.leftCols(j + 1) -= alpha * uHat.block(0, 1, N, j + 1); + rHat.col(j + 1) = precond.solve(mat * rHat.col(j)); + // Complete BiCG iteration by updating x + x += alpha * uHat.col(0); + // Check for early exit + zeta = rHat.col(0).norm(); + + if (zeta < tol * zeta0) { + /* + Convergence was achieved during BiCG step. + Without this check BiCGStab(L) fails for trivial matrices, such as when the preconditioner already is + the inverse, or the input matrix is identity. + */ + bicg_convergence = true; + break; + } + } + + if (bicg_convergence == false) { + /* + The polynomial/minimize residual step. + + QR Householder method for argmin is more stable than (modified) Gram-Schmidt, in the sense that there is + less loss of orthogonality. It is more accurate than solving the normal equations, since the normal equations + scale with condition number squared. + */ + VectorType gamma = (rHat.rightCols(L)).householderQr().solve(rHat.col(0)); + x += rHat.leftCols(L) * gamma; + rHat.col(0) -= rHat.rightCols(L) * gamma; + uHat.col(0) -= uHat.rightCols(L) * gamma; + zeta = rHat.col(0).norm(); + omega = gamma(L - 1); + } + + // TODO: Duplicate update code can be removed for the L=1 and L!=1 case. + // TODO: Use analytical expression instead of householder for L=1. + k++; + + /* + Reliable update part + + The recursively computed residual can deviate from the actual residual after several iterations. However, + computing the residual from the definition costs extra MVs and should not be done at each iteration. The reliable + update strategy computes the true residual from the definition: r=b-A*x at strategic intervals. Furthermore a + "group wise update" strategy is used to combine updates, which improves accuracy. + */ + + Mx = (std::max)(Mx, zeta); // Maximum norm of residuals since last update of x. + Mr = (std::max)(Mr, zeta); // Maximum norm of residuals since last computation of the true residual. + + if (zeta < delta * zeta0 && zeta0 <= Mx) { + update_app = true; + } + + if (update_app || (zeta < delta * Mr && zeta0 <= Mr)) { + compute_res = true; + } + + if (bicg_convergence) { + update_app = true; + compute_res = true; + bicg_convergence = false; + } + + if (compute_res) { + // rHat.col(0) = b_prime - mat * x; //Fokkema paper pseudocode + // Fokkema paper Fortan code L250-254 + rHat.col(0) = b_prime - precond.solve(mat * x); + zeta = rHat.col(0).norm(); + Mr = zeta; + + if (update_app) { + // After the group wise update, the original problem is translated to a shifted one. + x_prime += x; + x.setZero(); + b_prime = rHat.col(0); + Mx = zeta; + } + } + + compute_res = false; + update_app = false; + } + + // Convert internal variable to the true solution vector x + x += x_prime; + tol_error = zeta / zeta0; + iters = k; + + return true; +} + +} // namespace internal + +template > +class BiCGSTABL; + +namespace internal { + +template +struct traits > { + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; +}; + +} // namespace internal + +template +class BiCGSTABL : public IterativeSolverBase > { + typedef IterativeSolverBase Base; + using Base::m_error; + using Base::m_info; + using Base::m_isInitialized; + using Base::m_iterations; + using Base::matrix; + Index m_L; + + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + + public: + /** Default constructor. */ + BiCGSTABL() : Base() { m_L = 2; } + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + + This constructor is a shortcut for the default constructor followed + by a call to compute(). + + \warning this class stores a reference to the matrix A as well as some + precomputed values that depend on it. Therefore, if \a A is changed + this class becomes invalid. Call compute() to update it with the new + matrix A, or modify a copy of A. + */ + template + explicit BiCGSTABL(const EigenBase &A) : Base(A.derived()) { + m_L = 2; + } + + ~BiCGSTABL() {} + + /** \internal */ + /** Loops over the number of columns of b and does the following: + 1. sets the tolerence and maxIterations + 2. Calls the function that has the core solver routine + */ + template + void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + + bool ret = internal::bicgstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L); + + m_info = (!ret) ? NumericalIssue : m_error <= Base::m_tolerance ? Success : NoConvergence; + } + + /** \internal */ + /** Sets the parameter L, indicating the amount of minimize residual steps are used. */ + void setL(Index L) { + if (L < 1) { + L = 2; + } + + m_L = L; + } + + protected: +}; + +} // namespace Eigen + +#endif /* EIGEN_BICGSTABL_H */ diff --git a/unsupported/Eigen/src/IterativeSolvers/IDRStab.h b/unsupported/Eigen/src/IterativeSolvers/IDRStab.h new file mode 100755 index 000000000..59aecd44b --- /dev/null +++ b/unsupported/Eigen/src/IterativeSolvers/IDRStab.h @@ -0,0 +1,569 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2020 Chris Schoutrop +// Copyright (C) 2020 Mischa Senders +// Copyright (C) 2020 Lex Kuijpers +// Copyright (C) 2020 Jens Wehner +// Copyright (C) 2020 Jan van Dijk +// Copyright (C) 2020 Adithya Vijaykumar +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +/* + + The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L) + //TODO: elaborate what this improves over BiCGStab here + + Possible optimizations (PO): + -See //PO: notes in the code + + This implementation of IDRStab is based on + 1. Aihara, K., Abe, K., & Ishiwata, E. (2014). A variant of IDRstab with reliable update strategies for + solving sparse linear systems. Journal of Computational and Applied Mathematics, 259, 244-258. + doi:10.1016/j.cam.2013.08.028 + 2. Aihara, K., Abe, K., & Ishiwata, E. (2015). Preconditioned IDRStab Algorithms for Solving + Nonsymmetric Linear Systems. International Journal of Applied Mathematics, 45(3). + 3. Saad, Y. (2003). Iterative Methods for Sparse Linear Systems: Second Edition. Philadelphia, PA: SIAM. + 4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family of Simple and Fast Algorithms for Solving + Large Nonsymmetric Systems of Linear Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062. + doi:10.1137/070685804 + 5. Sonneveld, P. (2012). On the convergence behavior of IDR (s) and related methods. SIAM Journal on + Scientific Computing, 34(5), A2576-A2598. + + Right-preconditioning based on Ref. 3 is implemented here. +*/ + +#ifndef EIGEN_IDRSTAB_H +#define EIGEN_IDRSTAB_H + +namespace Eigen { + +namespace internal { + +template +bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters, + typename Dest::RealScalar &tol_error, Index L, Index S) { + /* + Setup and type definitions. + */ + using numext::abs; + using numext::sqrt; + typedef typename Dest::Scalar Scalar; + typedef typename Dest::RealScalar RealScalar; + typedef Matrix VectorType; + typedef Matrix DenseMatrixTypeCol; + typedef Matrix DenseMatrixTypeRow; + + const Index N = x.rows(); + + Index k = 0; // Iteration counter + const Index maxIters = iters; + + const RealScalar rhs_norm = rhs.norm(); + const RealScalar tol2 = tol_error * rhs_norm; + + if (rhs_norm == 0) { + /* + If b==0, then the exact solution is x=0. + rhs_norm is needed for other calculations anyways, this exit is a freebie. + */ + /* + exit + */ + x.setZero(); + tol_error = 0.0; + return true; + } + // Construct decomposition objects beforehand. + ColPivHouseholderQR qr_solver; + FullPivLU lu_solver; + + if (S >= N || L >= N) { + /* + The matrix is very small, or the choice of L and S is very poor + in that case solving directly will be best. + */ + /* + Exit + */ + lu_solver.compute(DenseMatrixTypeRow(mat)); + x = lu_solver.solve(rhs); + tol_error = (rhs - mat * x).norm() / rhs_norm; + return true; + } + + // Define maximum sizes to prevent any reallocation later on. + VectorType u(N * (L + 1)); + VectorType r(N * (L + 1)); + DenseMatrixTypeCol V(N * (L + 1), S); + + DenseMatrixTypeCol rHat(N, L + 1); + + VectorType alpha(S); + VectorType gamma(L); + VectorType update(N); + + /* + Main IDRStab algorithm + */ + // Set up the initial residual + r.head(N) = rhs - mat * precond.solve(x); + tol_error = r.head(N).norm(); + + DenseMatrixTypeRow h_FOM(S, S - 1); + h_FOM.setZero(); + + /* + Determine an initial U matrix of size N x S + */ + + DenseMatrixTypeCol U(N * (L + 1), S); + for (Index col_index = 0; col_index < S; ++col_index) { + // Arnoldi-like process to generate a set of orthogonal vectors spanning {u,A*u,A*A*u,...,A^(S-1)*u}. + // This construction can be combined with the Full Orthogonalization Method (FOM) from Ref.3 to provide a possible + // early exit with no additional MV. + if (col_index != 0) { + /* + Original Gram-Schmidt orthogonalization strategy from Ref. 1: + */ + // u.head(N) -= U.topLeftCorner(N, q) * (U.topLeftCorner(N, q).adjoint() * u.head(N)); + + /* + Modified Gram-Schmidt strategy: + Note that GS and MGS are mathematically equivalent, they are NOT numerically equivalent. + + Eventough h is a scalar, converting the dot product to Scalar is not supported: + http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1610 + */ + VectorType w = mat * precond.solve(u.head(N)); + for (Index i = 0; i < col_index; ++i) { + //"Normalization factor" (v is normalized already) + VectorType v = U.col(i).head(N); + + //"How much do v and w have in common?" + h_FOM(i, col_index - 1) = v.dot(w); + + //"Subtract the part they have in common" + w -= h_FOM(i, col_index - 1) * v; + } + u.head(N) = w; + h_FOM(col_index, col_index - 1) = u.head(N).norm(); + + if (abs(h_FOM(col_index, col_index - 1)) != 0.0) { + /* + This only happens if u is NOT exactly zero. In case it is exactly zero + it would imply that that this u has no component in the direction of the current residual. + + By then setting u to zero it will not contribute any further (as it should). + Whereas attempting to normalize results in division by zero. + + Such cases occur if: + 1. The basis of dimension 1) { + /* + Check for early FOM exit. + */ + Scalar beta = r.head(N).norm(); + VectorType e1 = VectorType::Zero(S - 1); + e1(0) = beta; + lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1)); + VectorType y = lu_solver.solve(e1); + VectorType x2 = x + U.topLeftCorner(N, S - 1) * y; + + // Using proposition 6.7 in Saad, one MV can be saved to calculate the residual + RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.col(S - 1).head(N)).norm(); + + if (FOM_residual < tol2) { + /* + Exit, the FOM algorithm was already accurate enough + */ + iters = k; + x = precond.solve(x2); + tol_error = FOM_residual / rhs_norm; + return true; + } + } + + /* + Select an initial (N x S) matrix R0. + 1. Generate random R0, orthonormalize the result. + 2. This results in R0, however to save memory and compute we only need the adjoint of R0. This is given by the + matrix R_T.\ Additionally, the matrix (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the + anti-distributivity property of the adjoint. This results in AR_T, which is constant if R_T does not have to be + regenerated and can be precomputed. Based on reference 4, this has zero probability in exact arithmetic. However in + practice it does (extremely infrequently) occur, most notably for small matrices. + */ + // PO: To save on memory consumption identity can be sparse + // PO: can this be done with colPiv/fullPiv version as well? This would save 1 construction of a HouseholderQR object + + // Original IDRStab and Kensuke choose S random vectors: + HouseholderQR qr(DenseMatrixTypeCol::Random(N, S)); + DenseMatrixTypeRow R_T = (qr.householderQ() * DenseMatrixTypeCol::Identity(N, S)).adjoint(); + DenseMatrixTypeRow AR_T = DenseMatrixTypeRow(R_T * mat); + + // Pre-allocate sigma, this space will be recycled without additional allocations. + DenseMatrixTypeCol sigma(S, S); + + Index rt_counter = k; // Iteration at which R_T was reset last + bool reset_while = false; // Should the while loop be reset for some reason? + + VectorType Q(S, 1); // Vector containing the row-scaling applied to sigma + VectorType P(S, 1); // Vector containing the column-scaling applied to sigma + DenseMatrixTypeCol QAP(S, S); // Scaled sigma + bool repair_flag = false; + RealScalar residual_0 = tol_error; + // bool apply_r_exit = false; + + while (k < maxIters) { + for (Index j = 1; j <= L; ++j) { + // Cache some indexing variables that occur frequently and are constant. + const Index Nj = N * j; + const Index Nj_plus_1 = N * (j + 1); + const Index Nj_min_1 = N * (j - 1); + + /* + The IDR Step + */ + // Construction of the sigma-matrix, and the decomposition of sigma. + for (Index i = 0; i < S; ++i) { + sigma.col(i).noalias() = AR_T * precond.solve(U.block(Nj_min_1, i, N, 1)); + } + /* + Suspected is that sigma could be badly scaled, since causes alpha~=0, but the + update vector is not zero. To improve stability we scale with absolute row and col sums first. + Sigma can become badly scaled (but still well-conditioned). + A bad sigma also happens if R_T is not chosen properly, for example if R_T is zeros sigma would be zeros + as well. The effect of this is a left-right preconditioner, instead of solving Ax=b, we solve + Q*A*P*inv(P)*x=Q*b. + */ + + Q = (sigma.cwiseAbs().rowwise().sum()).cwiseInverse(); // Calculate absolute inverse row sum + QAP = Q.asDiagonal() * sigma; // Scale with inverse absolute row sums + P = (QAP.cwiseAbs().colwise().sum()).cwiseInverse(); // Calculate absolute inverse column sum + QAP = QAP * P.asDiagonal(); // Scale with inverse absolute column sums + lu_solver.compute(QAP); + // Obtain the update coefficients alpha + if (j == 1) { + // alpha=inverse(sigma)*(R_T*r_0); + alpha.noalias() = lu_solver.solve(Q.asDiagonal() * R_T * r.head(N)); + } else { + // alpha=inverse(sigma)*(AR_T*r_{j-2}) + alpha.noalias() = lu_solver.solve(Q.asDiagonal() * AR_T * precond.solve(r.segment(N * (j - 2), N))); + } + // Unscale the solution + alpha = P.asDiagonal() * alpha; + + double old_res = tol_error; + + // Obtain new solution and residual from this update + update.noalias() = U.topRows(N) * alpha; + r.head(N) -= mat * precond.solve(update); + x += update; + + for (Index i = 1; i <= j - 2; ++i) { + // This only affects the case L>2 + r.segment(N * i, N) -= U.block(N * (i + 1), 0, N, S) * alpha; + } + if (j > 1) { + // r=[r;A*r_{j-2}] + r.segment(Nj_min_1, N).noalias() = mat * precond.solve(r.segment(N * (j - 2), N)); + } + tol_error = r.head(N).norm(); + + if (tol_error < tol2) { + // If at this point the algorithm has converged, exit. + reset_while = true; + break; + } + + if (repair_flag == false && tol_error > 10 * residual_0) { + // Sonneveld's repair flag suggestion from [5] + // This massively reduces problems with false residual estimates (if they'd occur) + repair_flag = true; + } + if (repair_flag && 1000 * tol_error < residual_0) { + // 1000 comes from Sonneveld's repair flag suggestion from [5] + r.head(N) = rhs - mat * precond.solve(x); + repair_flag = false; + } + + bool reset_R_T = false; + if (alpha.norm() * rhs_norm < S * NumTraits::epsilon() * old_res) { + // This would indicate the update computed by alpha did nothing much to decrease the residual + // apparantly we've also not converged either. + // TODO: Check if there is some better criterion, the current one is a bit handwavy. + reset_R_T = true; + } + + if (reset_R_T) { + if (k - rt_counter > 0) { + /* + Only regenerate if it didn't already happen this iteration. + */ + // Choose new R0 and try again + qr.compute(DenseMatrixTypeCol::Random(N, S)); + R_T = (qr.householderQ() * DenseMatrixTypeCol::Identity(N, S)) + .transpose(); //.adjoint() vs .transpose() makes no difference, R_T is random anyways. + /* + Additionally, the matrix (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the + anti-distributivity property of the adjoint. This results in AR_T, which can be precomputed. + */ + AR_T = DenseMatrixTypeRow(R_T * mat); + j = 0; // WARNING reset the for loop counter + rt_counter = k; + continue; + } + } + bool break_normalization = false; + for (Index q = 1; q <= S; ++q) { + if (q == 1) { + // u = r; + u.head(Nj_plus_1) = r.topRows(Nj_plus_1); + } else { + // u=[u_1;u_2;...;u_j] + u.head(Nj) = u.segment(N, Nj); + } + // Obtain the update coefficients beta implicitly + // beta=lu_sigma.solve(AR_T * u.block(Nj_min_1, 0, N, 1) + + u.head(Nj) -= U.topRows(Nj) * P.asDiagonal() * + lu_solver.solve(Q.asDiagonal() * AR_T * precond.solve(u.segment(Nj_min_1, N))); + + // u=[u;Au_{j-1}] + u.segment(Nj, N).noalias() = mat * precond.solve(u.segment(Nj_min_1, N)); + + // Orthonormalize u_j to the columns of V_j(:,1:q-1) + if (q > 1) { + /* + Modified Gram-Schmidt-like procedure to make u orthogonal to the columns of V from Ref. 1. + + The vector mu from Ref. 1 is obtained implicitly: + mu=V.block(Nj, 0, N, q - 1).adjoint() * u.block(Nj, 0, N, 1). + */ + + for (Index i = 0; i <= q - 2; ++i) { + //"Normalization factor" + Scalar h = V.col(i).segment(Nj, N).squaredNorm(); + + //"How much do u and V have in common?" + h = V.col(i).segment(Nj, N).dot(u.segment(Nj, N)) / h; + + //"Subtract the part they have in common" + u.head(Nj_plus_1) -= h * V.block(0, i, Nj_plus_1, 1); + } + } + // Normalize u and assign to a column of V + Scalar normalization_constant = u.block(Nj, 0, N, 1).norm(); + + if (normalization_constant != 0.0) { + /* + If u is exactly zero, this will lead to a NaN. Small, non-zero u is fine. In the case of NaN the + algorithm breaks down, eventhough it could have continued, since u zero implies that there is no further + update in a given direction. + */ + u.head(Nj_plus_1) /= normalization_constant; + } else { + u.head(Nj_plus_1).setZero(); + if (tol_error < tol2 || tol_error < 1e4 * NumTraits::epsilon()) { + // Just quit, we've converged + iters = k; + x = precond.solve(x); + tol_error = (rhs - mat * x).norm() / rhs_norm; + return true; + } + break_normalization = true; + break; + } + + V.block(0, q - 1, Nj_plus_1, 1).noalias() = u.head(Nj_plus_1); + } + + if (break_normalization == false) { + U = V; + } + } + if (reset_while) { + reset_while = false; + tol_error = r.head(N).norm(); + if (tol_error < tol2) { + /* + Slightly early exit by moving the criterion before the update of U, + after the main while loop the result of that calculation would not be needed. + */ + break; + } + continue; + } + + // r=[r;mat*r_{L-1}] + // Save this in rHat, the storage form for rHat is more suitable for the argmin step than the way r is stored. + // In Eigen 3.4 this step can be compactly done via: rHat = r.reshaped(N, L + 1); + r.segment(N * L, N).noalias() = mat * precond.solve(r.segment(N * (L - 1), N)); + + for (Index i = 0; i <= L; ++i) { + rHat.col(i) = r.segment(N * i, N); + } + + /* + The polynomial step + */ + qr_solver.compute(rHat.rightCols(L)); + gamma.noalias() = qr_solver.solve(r.head(N)); + + // Update solution and residual using the "minimized residual coefficients" + update.noalias() = rHat.leftCols(L) * gamma; + x += update; + r.head(N) -= mat * precond.solve(update); + + // Update iteration info + ++k; + tol_error = r.head(N).norm(); + + if (tol_error < tol2) { + // Slightly early exit by moving the criterion before the update of U, + // after the main while loop the result of that calculation would not be needed. + break; + } + + if (repair_flag == false && tol_error > 10 * residual_0) { + // Sonneveld's repair flag suggestion from [5] + // This massively reduces problems with false residual estimates (if they'd occur) + repair_flag = true; + } + if (repair_flag && 1000 * tol_error < residual_0) { + r.head(N) = rhs - mat * precond.solve(x); + repair_flag = false; + } + + /* + U=U0-sum(gamma_j*U_j) + Consider the first iteration. Then U only contains U0, so at the start of the while-loop + U should be U0. Therefore only the first N rows of U have to be updated. + */ + for (Index i = 1; i <= L; ++i) { + U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1); + } + } + + /* + Exit after the while loop terminated. + */ + iters = k; + x = precond.solve(x); + tol_error = tol_error / rhs_norm; + return true; +} + +} // namespace internal + +template > +class IDRStab; + +namespace internal { + +template +struct traits > { + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; +}; + +} // namespace internal + +template +class IDRStab : public IterativeSolverBase > { + typedef IterativeSolverBase Base; + using Base::m_error; + using Base::m_info; + using Base::m_isInitialized; + using Base::m_iterations; + using Base::matrix; + Index m_L; + Index m_S; + + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + + public: + /** Default constructor. */ + IDRStab() : Base() { + m_L = 2; + m_S = 4; + } + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + + This constructor is a shortcut for the default constructor followed + by a call to compute(). + + \warning this class stores a reference to the matrix A as well as some + precomputed values that depend on it. Therefore, if \a A is changed + this class becomes invalid. Call compute() to update it with the new + matrix A, or modify a copy of A. + */ + template + explicit IDRStab(const EigenBase &A) : Base(A.derived()) { + m_L = 2; + m_S = 4; + } + + ~IDRStab() {} + + /** \internal */ + + template + void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + bool ret = internal::idrstab(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L, m_S); + + m_info = (!ret) ? NumericalIssue : m_error <= 10 * Base::m_tolerance ? Success : NoConvergence; + } + + /** \internal */ + /** Sets the parameter L, indicating the amount of minimize residual steps are used. */ + void setL(Index L) { + if (L < 1) { + L = 2; + } + + m_L = L; + } + /** \internal */ + /** Sets the parameter S, indicating the dimension of the shadow residual space.. */ + void setS(Index S) { + if (S > 0) { + m_S = S; + } + } + + protected: +}; + +} // namespace Eigen + +#endif /* EIGEN_IDRSTAB_H */ diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 298db04ea..8e91ecda7 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -104,6 +104,8 @@ ei_add_test(splines) ei_add_test(gmres) ei_add_test(dgmres) ei_add_test(minres) +ei_add_test(bicgstabl) +ei_add_test(idrstab) ei_add_test(levenberg_marquardt) ei_add_test(kronecker_product) ei_add_test(bessel_functions) diff --git a/test/bicgstabl.cpp b/unsupported/test/bicgstabl.cpp similarity index 93% rename from test/bicgstabl.cpp rename to unsupported/test/bicgstabl.cpp index 7073f32ec..24c22ce41 100644 --- a/test/bicgstabl.cpp +++ b/unsupported/test/bicgstabl.cpp @@ -7,8 +7,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#include "sparse_solver.h" -#include +#include "../../test/sparse_solver.h" +#include template void test_BiCGSTABL_T() { diff --git a/test/idrstab.cpp b/unsupported/test/idrstab.cpp similarity index 94% rename from test/idrstab.cpp rename to unsupported/test/idrstab.cpp index 6d2ac681d..f7883c1cb 100644 --- a/test/idrstab.cpp +++ b/unsupported/test/idrstab.cpp @@ -7,8 +7,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#include "sparse_solver.h" -#include +#include "../../test/sparse_solver.h" +#include template void test_idrstab_T() { -- GitLab From dbb4562d55ee00c44a1ac6fd4ad092af1ee038c8 Mon Sep 17 00:00:00 2001 From: jwehner Date: Mon, 27 Jul 2020 16:01:31 +0200 Subject: [PATCH 12/18] updated comments --- .../Eigen/src/IterativeSolvers/BiCGSTABL.h | 38 ++--- .../Eigen/src/IterativeSolvers/IDRStab.h | 131 ++++++++---------- 2 files changed, 79 insertions(+), 90 deletions(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h b/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h index f90874368..923b78405 100755 --- a/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h +++ b/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h @@ -117,9 +117,9 @@ bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditio if (zeta < tol * zeta0) { /* - Convergence was achieved during BiCG step. - Without this check BiCGStab(L) fails for trivial matrices, such as when the preconditioner already is - the inverse, or the input matrix is identity. + Convergence was achieved during BiCG step. + Without this check BiCGStab(L) fails for trivial matrices, such as when the preconditioner already is + the inverse, or the input matrix is identity. */ bicg_convergence = true; break; @@ -128,11 +128,11 @@ bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditio if (bicg_convergence == false) { /* - The polynomial/minimize residual step. + The polynomial/minimize residual step. - QR Householder method for argmin is more stable than (modified) Gram-Schmidt, in the sense that there is - less loss of orthogonality. It is more accurate than solving the normal equations, since the normal equations - scale with condition number squared. + QR Householder method for argmin is more stable than (modified) Gram-Schmidt, in the sense that there is + less loss of orthogonality. It is more accurate than solving the normal equations, since the normal equations + scale with condition number squared. */ VectorType gamma = (rHat.rightCols(L)).householderQr().solve(rHat.col(0)); x += rHat.leftCols(L) * gamma; @@ -147,12 +147,12 @@ bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditio k++; /* - Reliable update part + Reliable update part - The recursively computed residual can deviate from the actual residual after several iterations. However, - computing the residual from the definition costs extra MVs and should not be done at each iteration. The reliable - update strategy computes the true residual from the definition: r=b-A*x at strategic intervals. Furthermore a - "group wise update" strategy is used to combine updates, which improves accuracy. + The recursively computed residual can deviate from the actual residual after several iterations. However, + computing the residual from the definition costs extra MVs and should not be done at each iteration. The reliable + update strategy computes the true residual from the definition: r=b-A*x at strategic intervals. Furthermore a + "group wise update" strategy is used to combine updates, which improves accuracy. */ Mx = (std::max)(Mx, zeta); // Maximum norm of residuals since last update of x. @@ -237,14 +237,14 @@ class BiCGSTABL : public IterativeSolverBase explicit BiCGSTABL(const EigenBase &A) : Base(A.derived()) { m_L = 2; diff --git a/unsupported/Eigen/src/IterativeSolvers/IDRStab.h b/unsupported/Eigen/src/IterativeSolvers/IDRStab.h index 59aecd44b..039342924 100755 --- a/unsupported/Eigen/src/IterativeSolvers/IDRStab.h +++ b/unsupported/Eigen/src/IterativeSolvers/IDRStab.h @@ -32,7 +32,7 @@ 5. Sonneveld, P. (2012). On the convergence behavior of IDR (s) and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598. - Right-preconditioning based on Ref. 3 is implemented here. + Right-preconditioning based on Ref. 3 is implemented here. */ #ifndef EIGEN_IDRSTAB_H @@ -46,7 +46,7 @@ template = N || L >= N) { /* - The matrix is very small, or the choice of L and S is very poor - in that case solving directly will be best. + The matrix is very small, or the choice of L and S is very poor + in that case solving directly will be best. */ /* - Exit + Exit */ lu_solver.compute(DenseMatrixTypeRow(mat)); x = lu_solver.solve(rhs); @@ -106,7 +103,7 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione VectorType update(N); /* - Main IDRStab algorithm + Main IDRStab algorithm */ // Set up the initial residual r.head(N) = rhs - mat * precond.solve(x); @@ -116,7 +113,7 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione h_FOM.setZero(); /* - Determine an initial U matrix of size N x S + Determine an initial U matrix of size N x S */ DenseMatrixTypeCol U(N * (L + 1), S); @@ -125,17 +122,9 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione // This construction can be combined with the Full Orthogonalization Method (FOM) from Ref.3 to provide a possible // early exit with no additional MV. if (col_index != 0) { - /* - Original Gram-Schmidt orthogonalization strategy from Ref. 1: - */ - // u.head(N) -= U.topLeftCorner(N, q) * (U.topLeftCorner(N, q).adjoint() * u.head(N)); /* - Modified Gram-Schmidt strategy: - Note that GS and MGS are mathematically equivalent, they are NOT numerically equivalent. - - Eventough h is a scalar, converting the dot product to Scalar is not supported: - http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1610 + Modified Gram-Schmidt strategy: */ VectorType w = mat * precond.solve(u.head(N)); for (Index i = 0; i < col_index; ++i) { @@ -153,23 +142,23 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione if (abs(h_FOM(col_index, col_index - 1)) != 0.0) { /* - This only happens if u is NOT exactly zero. In case it is exactly zero - it would imply that that this u has no component in the direction of the current residual. + This only happens if u is NOT exactly zero. In case it is exactly zero + it would imply that that this u has no component in the direction of the current residual. - By then setting u to zero it will not contribute any further (as it should). - Whereas attempting to normalize results in division by zero. + By then setting u to zero it will not contribute any further (as it should). + Whereas attempting to normalize results in division by zero. - Such cases occur if: - 1. The basis of dimension 1) { /* - Check for early FOM exit. + Check for early FOM exit. */ Scalar beta = r.head(N).norm(); VectorType e1 = VectorType::Zero(S - 1); @@ -197,7 +186,7 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione if (FOM_residual < tol2) { /* - Exit, the FOM algorithm was already accurate enough + Exit, the FOM algorithm was already accurate enough */ iters = k; x = precond.solve(x2); @@ -207,13 +196,13 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione } /* - Select an initial (N x S) matrix R0. - 1. Generate random R0, orthonormalize the result. - 2. This results in R0, however to save memory and compute we only need the adjoint of R0. This is given by the - matrix R_T.\ Additionally, the matrix (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the - anti-distributivity property of the adjoint. This results in AR_T, which is constant if R_T does not have to be - regenerated and can be precomputed. Based on reference 4, this has zero probability in exact arithmetic. However in - practice it does (extremely infrequently) occur, most notably for small matrices. + Select an initial (N x S) matrix R0. + 1. Generate random R0, orthonormalize the result. + 2. This results in R0, however to save memory and compute we only need the adjoint of R0. This is given by the + matrix R_T.\ Additionally, the matrix (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the + anti-distributivity property of the adjoint. This results in AR_T, which is constant if R_T does not have to be + regenerated and can be precomputed. Based on reference 4, this has zero probability in exact arithmetic. However in + practice it does (extremely infrequently) occur, most notably for small matrices. */ // PO: To save on memory consumption identity can be sparse // PO: can this be done with colPiv/fullPiv version as well? This would save 1 construction of a HouseholderQR object @@ -244,19 +233,19 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione const Index Nj_min_1 = N * (j - 1); /* - The IDR Step + The IDR Step */ // Construction of the sigma-matrix, and the decomposition of sigma. for (Index i = 0; i < S; ++i) { sigma.col(i).noalias() = AR_T * precond.solve(U.block(Nj_min_1, i, N, 1)); } /* - Suspected is that sigma could be badly scaled, since causes alpha~=0, but the - update vector is not zero. To improve stability we scale with absolute row and col sums first. - Sigma can become badly scaled (but still well-conditioned). - A bad sigma also happens if R_T is not chosen properly, for example if R_T is zeros sigma would be zeros - as well. The effect of this is a left-right preconditioner, instead of solving Ax=b, we solve - Q*A*P*inv(P)*x=Q*b. + Suspected is that sigma could be badly scaled, since causes alpha~=0, but the + update vector is not zero. To improve stability we scale with absolute row and col sums first. + Sigma can become badly scaled (but still well-conditioned). + A bad sigma also happens if R_T is not chosen properly, for example if R_T is zeros sigma would be zeros + as well. The effect of this is a left-right preconditioner, instead of solving Ax=b, we solve + Q*A*P*inv(P)*x=Q*b. */ Q = (sigma.cwiseAbs().rowwise().sum()).cwiseInverse(); // Calculate absolute inverse row sum @@ -327,8 +316,8 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione R_T = (qr.householderQ() * DenseMatrixTypeCol::Identity(N, S)) .transpose(); //.adjoint() vs .transpose() makes no difference, R_T is random anyways. /* - Additionally, the matrix (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the - anti-distributivity property of the adjoint. This results in AR_T, which can be precomputed. + Additionally, the matrix (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the + anti-distributivity property of the adjoint. This results in AR_T, which can be precomputed. */ AR_T = DenseMatrixTypeRow(R_T * mat); j = 0; // WARNING reset the for loop counter @@ -357,10 +346,10 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione // Orthonormalize u_j to the columns of V_j(:,1:q-1) if (q > 1) { /* - Modified Gram-Schmidt-like procedure to make u orthogonal to the columns of V from Ref. 1. + Modified Gram-Schmidt-like procedure to make u orthogonal to the columns of V from Ref. 1. - The vector mu from Ref. 1 is obtained implicitly: - mu=V.block(Nj, 0, N, q - 1).adjoint() * u.block(Nj, 0, N, 1). + The vector mu from Ref. 1 is obtained implicitly: + mu=V.block(Nj, 0, N, q - 1).adjoint() * u.block(Nj, 0, N, 1). */ for (Index i = 0; i <= q - 2; ++i) { @@ -379,9 +368,9 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione if (normalization_constant != 0.0) { /* - If u is exactly zero, this will lead to a NaN. Small, non-zero u is fine. In the case of NaN the - algorithm breaks down, eventhough it could have continued, since u zero implies that there is no further - update in a given direction. + If u is exactly zero, this will lead to a NaN. Small, non-zero u is fine. In the case of NaN the + algorithm breaks down, eventhough it could have continued, since u zero implies that there is no further + update in a given direction. */ u.head(Nj_plus_1) /= normalization_constant; } else { @@ -409,8 +398,8 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione tol_error = r.head(N).norm(); if (tol_error < tol2) { /* - Slightly early exit by moving the criterion before the update of U, - after the main while loop the result of that calculation would not be needed. + Slightly early exit by moving the criterion before the update of U, + after the main while loop the result of that calculation would not be needed. */ break; } @@ -458,9 +447,9 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione } /* - U=U0-sum(gamma_j*U_j) - Consider the first iteration. Then U only contains U0, so at the start of the while-loop - U should be U0. Therefore only the first N rows of U have to be updated. + U=U0-sum(gamma_j*U_j) + Consider the first iteration. Then U only contains U0, so at the start of the while-loop + U should be U0. Therefore only the first N rows of U have to be updated. */ for (Index i = 1; i <= L; ++i) { U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1); @@ -515,15 +504,15 @@ class IDRStab : public IterativeSolverBase m_S = 4; } - /** Initialize the solver with matrix \a A for further \c Ax=b solving. + /** Initialize the solver with matrix \a A for further \c Ax=b solving. - This constructor is a shortcut for the default constructor followed - by a call to compute(). + This constructor is a shortcut for the default constructor followed + by a call to compute(). - \warning this class stores a reference to the matrix A as well as some - precomputed values that depend on it. Therefore, if \a A is changed - this class becomes invalid. Call compute() to update it with the new - matrix A, or modify a copy of A. + \warning this class stores a reference to the matrix A as well as some + precomputed values that depend on it. Therefore, if \a A is changed + this class becomes invalid. Call compute() to update it with the new + matrix A, or modify a copy of A. */ template explicit IDRStab(const EigenBase &A) : Base(A.derived()) { -- GitLab From dc4381e9bfb3ed24435bd72c9d22a579e361aa0d Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 27 Jul 2020 19:45:19 +0200 Subject: [PATCH 13/18] Removed a TODO notice that should have been removed in commit 4401fd4eb9ac5bc91d3e8281e2041ede939fa331 --- unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h | 1 - 1 file changed, 1 deletion(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h b/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h index 923b78405..c2b64bfff 100755 --- a/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h +++ b/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h @@ -107,7 +107,6 @@ bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditio uHat.col(j + 1) = precond.solve(mat * uHat.col(j)); alpha = rho1 / (rShadow.dot(uHat.col(j + 1))); // Update residuals - // TODO check this block rHat.leftCols(j + 1) -= alpha * uHat.block(0, 1, N, j + 1); rHat.col(j + 1) = precond.solve(mat * rHat.col(j)); // Complete BiCG iteration by updating x -- GitLab From 6ea49a4f293b0ced90d8af111832aceb7ea3f1cb Mon Sep 17 00:00:00 2001 From: Jens Wehner Date: Tue, 1 Sep 2020 09:41:34 +0000 Subject: [PATCH 14/18] Fixed typo IDRStab.h --- unsupported/Eigen/src/IterativeSolvers/IDRStab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/IDRStab.h b/unsupported/Eigen/src/IterativeSolvers/IDRStab.h index 039342924..f60a1d786 100755 --- a/unsupported/Eigen/src/IterativeSolvers/IDRStab.h +++ b/unsupported/Eigen/src/IterativeSolvers/IDRStab.h @@ -68,7 +68,7 @@ bool idrstab(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditione /* If b==0, then the exact solution is x=0. rhs_norm is needed for other calculations anyways, this exit is a freebie. - */d + */ x.setZero(); tol_error = 0.0; return true; -- GitLab From d261a2c0577fb0f33723127f7f02b160234b8c33 Mon Sep 17 00:00:00 2001 From: jwehner Date: Thu, 3 Sep 2020 14:03:27 +0200 Subject: [PATCH 15/18] activated more tests, which all fail --- .../Eigen/src/IterativeSolvers/BiCGSTABL.h | 16 ++------ unsupported/test/bicgstabl.cpp | 37 ++++++++++--------- unsupported/test/idrstab.cpp | 6 +-- 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h b/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h index c2b64bfff..134bb554d 100755 --- a/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h +++ b/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h @@ -59,8 +59,8 @@ bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditio // We start with an initial guess x_0 and let us set r_0 as (residual calculated from x_0) VectorType r0 = precond.solve(rhs - mat * x); // r_0 - // rShadow is arbritary, but must not be orthogonal to r0. - VectorType rShadow = r0; + // rShadow is arbitrary, but must not be orthogonal to r0. + VectorType& rShadow = r0; VectorType x_prime = x; x.setZero(); @@ -92,10 +92,8 @@ bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditio while (zeta > tol * zeta0 && k < maxIters) { rho0 *= -omega; - for (Index j = 0; j < L; ++j) { Scalar rho1 = rShadow.dot(rHat.col(j)); - if ((numext::isnan)(rho1) || rho0 == 0.0) { tol_error = zeta / zeta0; return false; @@ -154,8 +152,8 @@ bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditio "group wise update" strategy is used to combine updates, which improves accuracy. */ - Mx = (std::max)(Mx, zeta); // Maximum norm of residuals since last update of x. - Mr = (std::max)(Mr, zeta); // Maximum norm of residuals since last computation of the true residual. + Mx = std::max(Mx, zeta); // Maximum norm of residuals since last update of x. + Mr = std::max(Mr, zeta); // Maximum norm of residuals since last computation of the true residual. if (zeta < delta * zeta0 && zeta0 <= Mx) { update_app = true; @@ -177,7 +175,6 @@ bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditio rHat.col(0) = b_prime - precond.solve(mat * x); zeta = rHat.col(0).norm(); Mr = zeta; - if (update_app) { // After the group wise update, the original problem is translated to a shifted one. x_prime += x; @@ -186,16 +183,13 @@ bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditio Mx = zeta; } } - compute_res = false; update_app = false; } - // Convert internal variable to the true solution vector x x += x_prime; tol_error = zeta / zeta0; iters = k; - return true; } @@ -272,11 +266,9 @@ class BiCGSTABL : public IterativeSolverBase -template void test_BiCGSTABL_T() -{ +template +void test_BiCGSTABL_T() { + for (int L = 2; L <= 8; L *= 2) { + BiCGSTABL, DiagonalPreconditioner > BiCGSTABL_diag; + BiCGSTABL, IdentityPreconditioner> BiCGSTABL_I; + BiCGSTABL, IncompleteLUT > BiCGSTABL_ilut; - BiCGSTABL, DiagonalPreconditioner > BiCGSTABL_diag; - BiCGSTABL, IdentityPreconditioner> BiCGSTABL_I; - BiCGSTABL, IncompleteLUT > BiCGSTABL_ilut; + BiCGSTABL_diag.setTolerance(NumTraits::epsilon() * 4); + BiCGSTABL_I.setTolerance(NumTraits::epsilon() * 4); + BiCGSTABL_ilut.setTolerance(NumTraits::epsilon() * 4); + BiCGSTABL_diag.setL(L); + BiCGSTABL_I.setL(L); + BiCGSTABL_ilut.setL(L); - BiCGSTABL_diag.setTolerance(NumTraits::epsilon() * 4); - BiCGSTABL_I.setTolerance(NumTraits::epsilon() * 4); - BiCGSTABL_ilut.setTolerance(NumTraits::epsilon() * 4); - - CALL_SUBTEST( check_sparse_square_solving(BiCGSTABL_diag)); - CALL_SUBTEST( check_sparse_square_solving(BiCGSTABL_I)); - CALL_SUBTEST( check_sparse_square_solving(BiCGSTABL_ilut)); + CALL_SUBTEST(check_sparse_square_solving(BiCGSTABL_diag)); + CALL_SUBTEST(check_sparse_square_solving(BiCGSTABL_I)); + CALL_SUBTEST(check_sparse_square_solving(BiCGSTABL_ilut)); + } } -EIGEN_DECLARE_TEST(bicgstabl) -{ - CALL_SUBTEST_1((test_BiCGSTABL_T()) ); - CALL_SUBTEST_2((test_BiCGSTABL_T, int>())); - CALL_SUBTEST_3((test_BiCGSTABL_T())); +EIGEN_DECLARE_TEST(bicgstabl) { + CALL_SUBTEST_1((test_BiCGSTABL_T())); + CALL_SUBTEST_2((test_BiCGSTABL_T, int>())); + CALL_SUBTEST_3((test_BiCGSTABL_T())); } diff --git a/unsupported/test/idrstab.cpp b/unsupported/test/idrstab.cpp index f7883c1cb..21660b71c 100644 --- a/unsupported/test/idrstab.cpp +++ b/unsupported/test/idrstab.cpp @@ -12,11 +12,9 @@ template void test_idrstab_T() { - int L = 2; - int S = 4; - //for(int L=1; L<=8; L*=2) + for(int L=1; L<=8; L*=2) { - //for(int S=1; S<=8; S*=2) + for(int S=1; S<=8; S*=2) { IDRStab, DiagonalPreconditioner > idrstab_colmajor_diag; IDRStab, IdentityPreconditioner> idrstab_colmajor_I; -- GitLab From 158f3045c3b4a425614bccf32355e15630824d89 Mon Sep 17 00:00:00 2001 From: Chris Date: Thu, 26 Nov 2020 14:09:30 +0100 Subject: [PATCH 16/18] Improved reliability of bicgstabl --- .../Eigen/src/IterativeSolvers/BiCGSTABL.h | 551 ++++++++++-------- 1 file changed, 303 insertions(+), 248 deletions(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h b/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h index 134bb554d..63523aa05 100755 --- a/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h +++ b/unsupported/Eigen/src/IterativeSolvers/BiCGSTABL.h @@ -15,261 +15,316 @@ This implementation of BiCGStab(L) is based on the papers General algorithm: 1. G.L.G. Sleijpen, D.R. Fokkema. (1993). BiCGstab(l) for linear equations involving unsymmetric - matrices with complex spectrum. Electronic Transactions on Numerical Analysis. Polynomial step update: + matrices with complex spectrum. Electronic Transactions on Numerical Analysis. Polynomial step update: 2. G.L.G. Sleijpen, M.B. Van Gijzen. (2010) Exploiting BiCGstab(l) strategies to induce dimension - reduction SIAM Journal on Scientific Computing. + reduction SIAM Journal on Scientific Computing. 3. Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving linear systems of equations. - Universiteit Utrecht. Mathematisch Instituut, 1996 + Universiteit Utrecht. Mathematisch Instituut, 1996 */ #ifndef EIGEN_BICGSTABL_H #define EIGEN_BICGSTABL_H -namespace Eigen { - -namespace internal { -/** \internal Low-level bi conjugate gradient stabilized algorithm with L additional residual minimization steps - * \param mat The matrix A - * \param rhs The right hand side vector b - * \param x On input and initial solution, on output the computed solution. - * \param precond A preconditioner being able to efficiently solve for an - * approximation of Ax=b (regardless of b) - * \param iters On input the max number of iteration, on output the number of performed iterations. - * \param tol_error On input the tolerance error, on output an estimation of the relative error. - * \param L On input Number of additional GMRES steps to take. If L is too large (~20) instabilities occur. - * \return false in the case of numerical issue, for example a break down of BiCGSTABL. - */ -template -bool bicgstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters, - typename Dest::RealScalar &tol_error, Index L) { - using numext::abs; - using numext::sqrt; - typedef typename Dest::RealScalar RealScalar; - typedef typename Dest::Scalar Scalar; - Index N = rhs.size(); - L = L < x.rows() ? L : x.rows(); - - Index k = 0; - - RealScalar tol = tol_error; - Index maxIters = iters; - - typedef Matrix VectorType; - typedef Matrix DenseMatrixType; - - // We start with an initial guess x_0 and let us set r_0 as (residual calculated from x_0) - VectorType r0 = precond.solve(rhs - mat * x); // r_0 - // rShadow is arbitrary, but must not be orthogonal to r0. - VectorType& rShadow = r0; - - VectorType x_prime = x; - x.setZero(); - VectorType b_prime = r0; - - // Other vectors and scalars initialization - Scalar rho0 = 1.0; - Scalar alpha = 0.0; - Scalar omega = 1.0; - - DenseMatrixType rHat(N, L + 1); - DenseMatrixType uHat(N, L + 1); - - rHat.col(0) = r0; - uHat.col(0).fill(0.0); - - bool bicg_convergence = false; - - RealScalar zeta0 = r0.norm(); - RealScalar zeta = zeta0; - RealScalar Mx = zeta0; - RealScalar Mr = zeta0; - - // Criterion for when to apply the group-wise update, conform ref 3. - const RealScalar delta = 0.01; - - bool compute_res = false; - bool update_app = false; - - while (zeta > tol * zeta0 && k < maxIters) { - rho0 *= -omega; - for (Index j = 0; j < L; ++j) { - Scalar rho1 = rShadow.dot(rHat.col(j)); - if ((numext::isnan)(rho1) || rho0 == 0.0) { - tol_error = zeta / zeta0; - return false; - } - Scalar beta = alpha * (rho1 / rho0); - rho0 = rho1; - // Update search directions - uHat.leftCols(j + 1) = rHat.leftCols(j + 1) - beta * uHat.leftCols(j + 1); - uHat.col(j + 1) = precond.solve(mat * uHat.col(j)); - alpha = rho1 / (rShadow.dot(uHat.col(j + 1))); - // Update residuals - rHat.leftCols(j + 1) -= alpha * uHat.block(0, 1, N, j + 1); - rHat.col(j + 1) = precond.solve(mat * rHat.col(j)); - // Complete BiCG iteration by updating x - x += alpha * uHat.col(0); - // Check for early exit - zeta = rHat.col(0).norm(); - - if (zeta < tol * zeta0) { - /* - Convergence was achieved during BiCG step. - Without this check BiCGStab(L) fails for trivial matrices, such as when the preconditioner already is - the inverse, or the input matrix is identity. - */ - bicg_convergence = true; - break; - } - } - - if (bicg_convergence == false) { - /* - The polynomial/minimize residual step. - - QR Householder method for argmin is more stable than (modified) Gram-Schmidt, in the sense that there is - less loss of orthogonality. It is more accurate than solving the normal equations, since the normal equations - scale with condition number squared. - */ - VectorType gamma = (rHat.rightCols(L)).householderQr().solve(rHat.col(0)); - x += rHat.leftCols(L) * gamma; - rHat.col(0) -= rHat.rightCols(L) * gamma; - uHat.col(0) -= uHat.rightCols(L) * gamma; - zeta = rHat.col(0).norm(); - omega = gamma(L - 1); - } - - // TODO: Duplicate update code can be removed for the L=1 and L!=1 case. - // TODO: Use analytical expression instead of householder for L=1. - k++; - - /* - Reliable update part - - The recursively computed residual can deviate from the actual residual after several iterations. However, - computing the residual from the definition costs extra MVs and should not be done at each iteration. The reliable - update strategy computes the true residual from the definition: r=b-A*x at strategic intervals. Furthermore a - "group wise update" strategy is used to combine updates, which improves accuracy. - */ - - Mx = std::max(Mx, zeta); // Maximum norm of residuals since last update of x. - Mr = std::max(Mr, zeta); // Maximum norm of residuals since last computation of the true residual. - - if (zeta < delta * zeta0 && zeta0 <= Mx) { - update_app = true; - } - - if (update_app || (zeta < delta * Mr && zeta0 <= Mr)) { - compute_res = true; - } - - if (bicg_convergence) { - update_app = true; - compute_res = true; - bicg_convergence = false; - } - - if (compute_res) { - // rHat.col(0) = b_prime - mat * x; //Fokkema paper pseudocode - // Fokkema paper Fortan code L250-254 - rHat.col(0) = b_prime - precond.solve(mat * x); - zeta = rHat.col(0).norm(); - Mr = zeta; - if (update_app) { - // After the group wise update, the original problem is translated to a shifted one. - x_prime += x; - x.setZero(); - b_prime = rHat.col(0); - Mx = zeta; - } - } - compute_res = false; - update_app = false; - } - // Convert internal variable to the true solution vector x - x += x_prime; - tol_error = zeta / zeta0; - iters = k; - return true; -} - -} // namespace internal - -template > -class BiCGSTABL; - -namespace internal { - -template -struct traits > { - typedef _MatrixType MatrixType; - typedef _Preconditioner Preconditioner; -}; - -} // namespace internal - -template -class BiCGSTABL : public IterativeSolverBase > { - typedef IterativeSolverBase Base; - using Base::m_error; - using Base::m_info; - using Base::m_isInitialized; - using Base::m_iterations; - using Base::matrix; - Index m_L; - - public: - typedef _MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::RealScalar RealScalar; - typedef _Preconditioner Preconditioner; - - public: - /** Default constructor. */ - BiCGSTABL() : Base() { m_L = 2; } - - /** Initialize the solver with matrix \a A for further \c Ax=b solving. - - This constructor is a shortcut for the default constructor followed - by a call to compute(). - - \warning this class stores a reference to the matrix A as well as some - precomputed values that depend on it. Therefore, if \a A is changed - this class becomes invalid. Call compute() to update it with the new - matrix A, or modify a copy of A. - */ - template - explicit BiCGSTABL(const EigenBase &A) : Base(A.derived()) { - m_L = 2; - } - - ~BiCGSTABL() {} - - /** \internal */ - /** Loops over the number of columns of b and does the following: - 1. sets the tolerence and maxIterations - 2. Calls the function that has the core solver routine - */ - template - void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const { - m_iterations = Base::maxIterations(); - m_error = Base::m_tolerance; - - bool ret = internal::bicgstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L); - - m_info = (!ret) ? NumericalIssue : m_error <= Base::m_tolerance ? Success : NoConvergence; - } - - /** \internal */ - /** Sets the parameter L, indicating the amount of minimize residual steps are used. */ - void setL(Index L) { - if (L < 1) { - L = 2; - } - m_L = L; - } - -}; +namespace Eigen +{ + + namespace internal + { + /** \internal Low-level bi conjugate gradient stabilized algorithm with L additional residual minimization steps + \param mat The matrix A + \param rhs The right hand side vector b + \param x On input and initial solution, on output the computed solution. + \param precond A preconditioner being able to efficiently solve for an + approximation of Ax=b (regardless of b) + \param iters On input the max number of iteration, on output the number of performed iterations. + \param tol_error On input the tolerance error, on output an estimation of the relative error. + \param L On input Number of additional GMRES steps to take. If L is too large (~20) instabilities occur. + \return false in the case of numerical issue, for example a break down of BiCGSTABL. + */ + template + bool bicgstabl(const MatrixType& mat, const Rhs& rhs, Dest& x, const Preconditioner& precond, + Index& iters, + typename Dest::RealScalar& tol_error, Index L) + { + using numext::abs; + using numext::sqrt; + typedef typename Dest::RealScalar RealScalar; + typedef typename Dest::Scalar Scalar; + Index N = rhs.size(); + L = L < x.rows() ? L : x.rows(); + + Index k = 0; + + RealScalar tol = tol_error; + Index maxIters = iters; + + typedef Matrix VectorType; + typedef Matrix DenseMatrixType; + + // We start with an initial guess x_0 and let us set r_0 as (residual calculated from x_0) + //VectorType r0 = precond.solve(rhs - mat * x); // r_0 + //VectorType r0 = rhs - mat * x; // r_0 + VectorType r0 = rhs - mat * precond.solve(x); // r_0 + // rShadow is arbritary, but must not be orthogonal to r0. + VectorType rShadow = r0; + + VectorType x_prime = x; + x.setZero(); + VectorType b_prime = r0; + + // Other vectors and scalars initialization + Scalar rho0 = 1.0; + Scalar alpha = 1.0; + Scalar omega = 1.0; + + DenseMatrixType rHat(N, L + 1); + DenseMatrixType uHat(N, L + 1); + + rHat.col(0) = r0; + uHat.col(0).fill(0.0); + + bool bicg_convergence = false; + + RealScalar zeta0 = rhs.norm(); + RealScalar zeta = zeta0; + RealScalar Mx = zeta0; + RealScalar Mr = zeta0; + + // Criterion for when to apply the group-wise update, conform ref 3. + const RealScalar delta = 0.01; + + bool compute_res = false; + bool update_app = false; + Index restarts = 0; + + while (zeta > tol * zeta0 && k < maxIters) + { + rho0 *= -omega; + + for (Index j = 0; j < L; ++j) + { + Scalar rho1 = rShadow.dot(rHat.col(j)); + + if ((numext::isnan)(rho1) || rho0 == 0.0) + { + tol_error = zeta / zeta0; + return false; + } + + // if (abs(rho1) < NumTraits::epsilon() * zeta0) + // { + // // The new residual vector became too orthogonal to the arbitrarily chosen direction r0 + // // Let's restart with a new r0: + // rHat.col(j) = rhs - mat * precond.solve(x); + // rShadow = rHat.col(j); + // rho1 = zeta0 = rHat.col(j).norm(); + + // if (restarts++ == 0) + // { + // k = 0; + // } + // } + + Scalar beta = alpha * (rho1 / rho0); + rho0 = rho1; + // Update search directions + uHat.leftCols(j + 1) = rHat.leftCols(j + 1) - beta * uHat.leftCols(j + 1); + //uHat.col(j + 1) = precond.solve(mat * uHat.col(j)); + uHat.col(j + 1) = mat * precond.solve(uHat.col(j)); + alpha = rho1 / (rShadow.dot(uHat.col(j + 1))); + // Update residuals + rHat.leftCols(j + 1) -= alpha * uHat.block(0, 1, N, j + 1); + //rHat.col(j + 1) = precond.solve(mat * rHat.col(j)); + rHat.col(j + 1) = mat * precond.solve(rHat.col(j)); + // Complete BiCG iteration by updating x + x += alpha * uHat.col(0); + // Check for early exit + zeta = rHat.col(0).norm(); + + if (zeta < tol * zeta0) + { + /* + Convergence was achieved during BiCG step. + Without this check BiCGStab(L) fails for trivial matrices, such as when the preconditioner already is + the inverse, or the input matrix is identity. + */ + bicg_convergence = true; + break; + } + } + + if (bicg_convergence == false) + { + /* + The polynomial/minimize residual step. + + QR Householder method for argmin is more stable than (modified) Gram-Schmidt, in the sense that there is + less loss of orthogonality. It is more accurate than solving the normal equations, since the normal equations + scale with condition number squared. + */ + VectorType gamma = (rHat.rightCols(L)).householderQr().solve(rHat.col(0)); + x += rHat.leftCols(L) * gamma; + rHat.col(0) -= rHat.rightCols(L) * gamma; + uHat.col(0) -= uHat.rightCols(L) * gamma; + zeta = rHat.col(0).norm(); + omega = gamma(L - 1); + } + + // TODO: Duplicate update code can be removed for the L=1 and L!=1 case. + // TODO: Use analytical expression instead of householder for L=1. + k++; + + /* + Reliable update part + + The recursively computed residual can deviate from the actual residual after several iterations. However, + computing the residual from the definition costs extra MVs and should not be done at each iteration. The reliable + update strategy computes the true residual from the definition: r=b-A*x at strategic intervals. Furthermore a + "group wise update" strategy is used to combine updates, which improves accuracy. + */ + + Mx = (std::max)(Mx, zeta); // Maximum norm of residuals since last update of x. + Mr = (std::max)(Mr, + zeta); // Maximum norm of residuals since last computation of the true residual. + + if (zeta < delta * zeta0 && zeta0 <= Mx) + { + update_app = true; + } + + if (update_app || (zeta < delta * Mr && zeta0 <= Mr)) + { + compute_res = true; + } + + if (bicg_convergence) + { + update_app = true; + compute_res = true; + bicg_convergence = false; + } + + if (compute_res) + { + //rHat.col(0) = b_prime - mat * x; //Fokkema paper pseudocode + // Fokkema paper Fortan code L250-254 + //rHat.col(0) = b_prime - precond.solve(mat * x); + rHat.col(0) = b_prime - mat * precond.solve(x); + zeta = rHat.col(0).norm(); + Mr = zeta; + + if (update_app) + { + // After the group wise update, the original problem is translated to a shifted one. + x_prime += x; + x.setZero(); + b_prime = rHat.col(0); + Mx = zeta; + } + } + + compute_res = false; + update_app = false; + } + + // Convert internal variable to the true solution vector x + x += x_prime; + tol_error = zeta / zeta0; + iters = k; + x = precond.solve(x); + + return true; + } + + } // namespace internal + + template > + class BiCGSTABL; + + namespace internal + { + + template + struct traits > + { + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; + }; + + } // namespace internal + + template + class BiCGSTABL : public IterativeSolverBase > + { + typedef IterativeSolverBase Base; + using Base::m_error; + using Base::m_info; + using Base::m_isInitialized; + using Base::m_iterations; + using Base::matrix; + Index m_L; + + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + + public: + /** Default constructor. */ + BiCGSTABL() : Base() + { + m_L = 2; + } + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + + This constructor is a shortcut for the default constructor followed + by a call to compute(). + + \warning this class stores a reference to the matrix A as well as some + precomputed values that depend on it. Therefore, if \a A is changed + this class becomes invalid. Call compute() to update it with the new + matrix A, or modify a copy of A. + */ + template + explicit BiCGSTABL(const EigenBase& A) : Base(A.derived()) + { + m_L = 2; + } + + ~BiCGSTABL() {} + + /** \internal */ + /** Loops over the number of columns of b and does the following: + 1. sets the tolerence and maxIterations + 2. Calls the function that has the core solver routine + */ + template + void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const + { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + + bool ret = internal::bicgstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L); + + m_info = (!ret) ? NumericalIssue : m_error <= Base::m_tolerance ? Success : NoConvergence; + } + + /** \internal */ + /** Sets the parameter L, indicating the amount of minimize residual steps are used. */ + void setL(Index L) + { + if (L < 1) + { + L = 2; + } + + m_L = L; + } + + protected: + }; } // namespace Eigen -- GitLab From 8aef08c6cb463f496ab800333f7913b733ccc4e2 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 28 Nov 2020 22:40:39 +0100 Subject: [PATCH 17/18] Added IDR(S), based on reference MATLAB implementation --- unsupported/Eigen/src/IterativeSolvers/IDRS.h | 372 ++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100755 unsupported/Eigen/src/IterativeSolvers/IDRS.h diff --git a/unsupported/Eigen/src/IterativeSolvers/IDRS.h b/unsupported/Eigen/src/IterativeSolvers/IDRS.h new file mode 100755 index 000000000..13391781f --- /dev/null +++ b/unsupported/Eigen/src/IterativeSolvers/IDRS.h @@ -0,0 +1,372 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2020 Chris Schoutrop +// Copyright (C) 2020 Jens Wehner +// Copyright (C) 2020 Jan van Dijk +// Copyright (C) 2020 Adithya Vijaykumar +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + This implementation of BiCGStab(L) is based on the papers + General algorithm: + 1. G.L.G. Sleijpen, D.R. Fokkema. (1993). BiCGstab(l) for linear equations involving unsymmetric + matrices with complex spectrum. Electronic Transactions on Numerical Analysis. Polynomial step update: + 2. G.L.G. Sleijpen, M.B. Van Gijzen. (2010) Exploiting BiCGstab(l) strategies to induce dimension + reduction SIAM Journal on Scientific Computing. + 3. Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving linear systems of equations. + Universiteit Utrecht. Mathematisch Instituut, 1996 +*/ + +#ifndef EIGEN_IDRS_H +#define EIGEN_IDRS_H + +namespace Eigen +{ + + namespace internal + { + /** \internal Low-level Induced Dimension Reduction algoritm + \param A The matrix A + \param b The right hand side vector b + \param x On input and initial solution, on output the computed solution. + \param precond A preconditioner being able to efficiently solve for an + approximation of Ax=b (regardless of b) + \param iter On input the max number of iteration, on output the number of performed iterations. + \param relres On input the tolerance error, on output an estimation of the relative error. + \param S On input Number of additional GMRES steps to take. If L is too large (~20) instabilities occur. + \return false in the case of numerical issue, for example a break down of IDRS. + */ + template + RealScalar omega(const Vector& t, const Vector& s, RealScalar angle) + { + RealScalar ns = s.norm(); + RealScalar nt = t.norm(); + RealScalar ts = t.adjoint() * s; + RealScalar rho = std::abs(ts / (nt * ns)); + RealScalar om = ts / (nt * nt); + + if (rho < angle) + { + om = om * angle / rho; + } + + return om; + } + + template + bool idrs(const MatrixType& A, const Rhs& b, Dest& x, const Preconditioner& precond, + Index& iter, + typename Dest::RealScalar& relres, Index S) + { + using numext::abs; + using numext::sqrt; + typedef typename Dest::RealScalar RealScalar; + typedef typename Dest::Scalar Scalar; + typedef Matrix VectorType; + typedef Matrix DenseMatrixType; + Index N = b.size(); + S = S < x.rows() ? S : x.rows(); + + RealScalar tol = relres; + Index maxit = iter; + + //Other parameters + bool smoothing = false; + RealScalar angle = 0.7; + bool replacement = false; + Index replacements = 0; + bool trueres = false; + + FullPivLU lu_solver; + + HouseholderQR qr(DenseMatrixType::Random(N, S)); + DenseMatrixType P = (qr.householderQ() * DenseMatrixType::Identity(N, S)); + + RealScalar normb = b.norm(); + + if (normb == 0.0) + { + //Solution is the zero vector + x.setZero(); + iter = 0; + relres = 0; + return true; + } + + RealScalar mp = 1e3 * NumTraits::epsilon(); + + //Compute initial residual + RealScalar tolb = tol * normb; //Relative tolerance + VectorType r = b - A * x; + + VectorType x_s, r_s; + + if (smoothing) + { + x_s = x; + r_s = r; + } + + RealScalar normr = r.norm(); + + if (normr <= tolb) + { + //Initial guess is a good enough solution + iter = 0; + relres = normr / normb; + return true; + } + + DenseMatrixType G = DenseMatrixType::Zero(N, S); + DenseMatrixType U = DenseMatrixType::Zero(N, S); + DenseMatrixType M = DenseMatrixType::Identity(S, S); + VectorType t(N), v(N); + RealScalar om = 1; + + //Main iteration loop, guild G-spaces: + iter = 0; + + while (normr > tolb && iter < maxit) + { + //New right hand size for small system: + VectorType f = (r.adjoint() * P).adjoint(); + + for (Index k = 1; k <= S; ++k) + { + //Solve small system and make v orthogonal to P: + //c = M(k:s,k:s)\f(k:s); + lu_solver.compute(M.block(k - 1, k - 1, S - k + 1, S - k + 1)); + VectorType c = lu_solver.solve(f.segment(k - 1, S - k + 1)); + //v = r - G(:,k:s)*c; + v = r - G.rightCols(S - k + 1) * c; + //Preconditioning + v = precond.solve(v); + + //Compute new U(:,k) and G(:,k), G(:,k) is in space G_j + U.col(k - 1) = U.rightCols(S - k + 1) * c + om * v; + G.col(k - 1) = A * U.col(k - 1); + + //Bi-Orthogonalise the new basis vectors: + for (Index i = 1; i <= k - 1; ++i) + { + //alpha = ( P(:,i)'*G(:,k) )/M(i,i); + //alpha is a Scalar, but the conversion is not allowed + VectorType alpha = (P.col(i - 1).adjoint() * G.col(k - 1)) / M(i - 1, i - 1); + G.col(k - 1) = G.col(k - 1) - alpha(0) * G.col(i - 1); + U.col(k - 1) = U.col(k - 1) - alpha(0) * U.col(i - 1); + } + + //New column of M = P'*G (first k-1 entries are zero) + //M(k:s,k) = (G(:,k)'*P(:,k:s))'; + M.block(k - 1, k - 1, S - k + 1, 1) = (G.col(k - 1).adjoint() * P.rightCols(S - k + 1)).adjoint(); + + if (M(k - 1, k - 1) == 0.0) + { + return false; + } + + //Make r orthogonal to q_i, i = 1..k + Scalar beta = f(k - 1) / M(k - 1, k - 1); + r = r - beta * G.col(k - 1); + x = x + beta * U.col(k - 1); + normr = r.norm(); + + if (replacement && normr > tolb / mp) + { + trueres = true; + } + + //Smoothing: + if (smoothing) + { + t = r_s - r; + //gamma is a Scalar, but the conversion is not allowed + VectorType gamma = (t.adjoint() * r_s) / (t.adjoint() * t); + r_s = r_s - gamma(0) * t; + x_s = x_s - gamma(0) * (x_s - x); + normr = r_s.norm(); + } + + iter = iter + 1; + + if (normr < tolb || iter == maxit) + { + break; + } + + //New f = P'*r (first k components are zero) + if (k < S) + { + f.segment(k + 1 - 1, S - (k + 1) + 1) = f.segment(k + 1 - 1, S - (k + 1) + 1) - beta * M.block(k + 1 - 1, k - 1, S - (k + 1) + 1, 1); + } + }//end for + + if (normr < tolb || iter == maxit) + { + break; + } + + //Now we have sufficient vectors in G_j to compute residual in G_j+1 + //Note: r is already perpendicular to P so v = r + //Preconditioning + v = r; + v = precond.solve(v); + + //Matrix-vector multiplication: + t = A * v; + + //Computation of a new omega + om = internal::omega(t, r, angle); + + if (om == 0.0) + { + return false; + } + + r = r - om * t; + x = x + om * v; + normr = r.norm(); + + if (replacement && normr > tolb / mp) + { + trueres = true; + } + + //Residual replacement? + if (trueres && normr < normb) + { + r = b - A * x; + trueres = false; + replacements = replacements + 1; + } + + //Smoothing: + if (smoothing) + { + t = r_s - r; + //gamma is a Scalar, but the conversion is not allowed + VectorType gamma = (t.adjoint() * r_s) / (t.adjoint() * t); + r_s = r_s - gamma(0) * t; + x_s = x_s - gamma(0) * (x_s - x); + normr = r_s.norm(); + } + + iter = iter + 1; + + }//end while + + if (smoothing) + { + x = x_s; + } + + relres = (b - A * x).norm() / normb; + + if (relres < tol) + { + return true; + } + else + { + return false; + } + } + + } // namespace internal + + template > + class IDRS; + + namespace internal + { + + template + struct traits > + { + typedef _MatrixType MatrixType; + typedef _Preconditioner Preconditioner; + }; + + } // namespace internal + + template + class IDRS : public IterativeSolverBase > + { + typedef IterativeSolverBase Base; + using Base::m_error; + using Base::m_info; + using Base::m_isInitialized; + using Base::m_iterations; + using Base::matrix; + Index m_S; + + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef _Preconditioner Preconditioner; + + public: + /** Default constructor. */ + IDRS() : Base() + { + m_S = 4; + } + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + + This constructor is a shortcut for the default constructor followed + by a call to compute(). + + \warning this class stores a reference to the matrix A as well as some + precomputed values that depend on it. Therefore, if \a A is changed + this class becomes invalid. Call compute() to update it with the new + matrix A, or modify a copy of A. + */ + template + explicit IDRS(const EigenBase& A) : Base(A.derived()) + { + m_S = 4; + } + + ~IDRS() {} + + /** \internal */ + /** Loops over the number of columns of b and does the following: + 1. sets the tolerence and maxIterations + 2. Calls the function that has the core solver routine + */ + template + void _solve_vector_with_guess_impl(const Rhs& b, Dest& x) const + { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + + bool ret = internal::idrs(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_S); + + m_info = (!ret) ? NumericalIssue : m_error <= Base::m_tolerance ? Success : NoConvergence; + } + + /** \internal */ + /** Sets the parameter S, indicating the dimension of the shadow space. */ + void setL(Index S) + { + if (S < 1) + { + S = 4; + } + + m_S = S; + } + + protected: + }; + +} // namespace Eigen + +#endif /* EIGEN_IDRS_H */ -- GitLab From 50747eb9dd4fb86336104700e4dd00433420c570 Mon Sep 17 00:00:00 2001 From: Chris Date: Sat, 28 Nov 2020 22:50:07 +0100 Subject: [PATCH 18/18] Updated IDR(S) documentation --- unsupported/Eigen/src/IterativeSolvers/IDRS.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/unsupported/Eigen/src/IterativeSolvers/IDRS.h b/unsupported/Eigen/src/IterativeSolvers/IDRS.h index 13391781f..1a3c6cf32 100755 --- a/unsupported/Eigen/src/IterativeSolvers/IDRS.h +++ b/unsupported/Eigen/src/IterativeSolvers/IDRS.h @@ -4,23 +4,11 @@ // Copyright (C) 2020 Chris Schoutrop // Copyright (C) 2020 Jens Wehner // Copyright (C) 2020 Jan van Dijk -// Copyright (C) 2020 Adithya Vijaykumar // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -/* - - This implementation of BiCGStab(L) is based on the papers - General algorithm: - 1. G.L.G. Sleijpen, D.R. Fokkema. (1993). BiCGstab(l) for linear equations involving unsymmetric - matrices with complex spectrum. Electronic Transactions on Numerical Analysis. Polynomial step update: - 2. G.L.G. Sleijpen, M.B. Van Gijzen. (2010) Exploiting BiCGstab(l) strategies to induce dimension - reduction SIAM Journal on Scientific Computing. - 3. Fokkema, Diederik R. Enhanced implementation of BiCGstab (l) for solving linear systems of equations. - Universiteit Utrecht. Mathematisch Instituut, 1996 -*/ #ifndef EIGEN_IDRS_H #define EIGEN_IDRS_H @@ -38,7 +26,7 @@ namespace Eigen approximation of Ax=b (regardless of b) \param iter On input the max number of iteration, on output the number of performed iterations. \param relres On input the tolerance error, on output an estimation of the relative error. - \param S On input Number of additional GMRES steps to take. If L is too large (~20) instabilities occur. + \param S On input Number of the dimension of the shadow space. \return false in the case of numerical issue, for example a break down of IDRS. */ template -- GitLab