diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index 679f4550..ca7c0bf0 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -24,16 +24,15 @@ jobs: - 'nightly' os: - ubuntu-latest - - macOS-latest - - windows-latest arch: - x64 + continue-on-error: true steps: - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v1 + - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: julia-actions/cache@v1 + - uses: julia-actions/cache@v2 - uses: julia-actions/julia-buildpkg@latest - uses: julia-actions/julia-runtest@latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 96c2f40a..d118bc86 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,8 @@ jobs: fail-fast: false matrix: version: - - '1.6' # LTS version + - '1.6' + - 'lts' - '1' # automatically expands to the latest stable 1.x release of Julia os: - ubuntu-latest @@ -31,11 +32,11 @@ jobs: - x64 steps: - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v1 + - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: julia-actions/cache@v1 + - uses: julia-actions/cache@v2 - uses: julia-actions/julia-buildpkg@latest - uses: julia-actions/julia-runtest@latest - uses: julia-actions/julia-processcoverage@v1 @@ -52,20 +53,19 @@ jobs: fail-fast: false matrix: version: - - '1.6' # LTS version + - 'lts' - '1' # automatically expands to the latest stable 1.x release of Julia os: - ubuntu-latest - - macOS-latest arch: - x64 steps: - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v1 + - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} - - uses: julia-actions/cache@v1 + - uses: julia-actions/cache@v2 - uses: julia-actions/julia-buildpkg@latest - uses: julia-actions/julia-runtest@latest env: diff --git a/Project.toml b/Project.toml index bda8f9e7..15df596d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,38 +1,46 @@ name = "KrylovKit" uuid = "0b1a1467-8014-51b9-945f-bf0ae24f4b77" authors = ["Jutho Haegeman"] -version = "0.7.1" +version = "0.9.4" [deps] -ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +PackageExtensionCompat = "65ce6f38-6b18-4e1d-a461-8949797d7930" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" VectorInterface = "409d34a3-91d5-4945-b6ec-7529ddf182d8" +[weakdeps] +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + +[extensions] +KrylovKitChainRulesCoreExt = "ChainRulesCore" + [compat] Aqua = "0.6, 0.7, 0.8" ChainRulesCore = "1" ChainRulesTestUtils = "1" FiniteDifferences = "0.12" -GPUArraysCore = "0.1" -VectorInterface = "0.4" LinearAlgebra = "1" -Random = "1" +Logging = "1" +PackageExtensionCompat = "1" Printf = "1" +Random = "1" Test = "1" -TestExtras = "0.2" +TestExtras = "0.2,0.3" +VectorInterface = "0.5" Zygote = "0.6" julia = "1.6" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Test", "Aqua", "Random", "TestExtras", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] +test = ["Test", "Aqua", "Logging", "TestExtras", "ChainRulesTestUtils", "ChainRulesCore", "FiniteDifferences", "Zygote"] diff --git a/README.md b/README.md index 22ba6551..286628bc 100644 --- a/README.md +++ b/README.md @@ -31,36 +31,25 @@ to vectors. ## Release notes for the latest version -### v0.7 -This version now depends on and uses [VectorInterface.jl](https://github.com/Jutho/VectorInterface.jl) -to define the vector-like behavior of the input vectors, rather than some minimal set of -methods from `Base` and `LinearAlgebra`. The advantage is that many more types from standard -Julia are now supported out of the box, such as nested vectors or immutable objects such as -tuples. For custom user types for which the old set of required methods was implemented, there -are fallback definitions of the methods in VectorInferace.jl such that these types should still -be supported, but this might result in warnings being printed. It is recommend to implement full -support for at least the methods in VectorInterface without bang or with double bang, where the -latter set of methods can use in-place mutation if your type supports this behavior. - -In particular, tuples are now supported: - -```julia -julia> values, vectors, info = eigsolve(t -> cumsum(t) .+ 0.5 .* reverse(t), (1,0,0,0)); - -julia> values -4-element Vector{ComplexF64}: - 2.5298897746721303 + 0.0im - 0.7181879189193713 + 0.4653321688070444im - 0.7181879189193713 - 0.4653321688070444im - 0.03373438748912972 + 0.0im - -julia> vectors -4-element Vector{NTuple{4, ComplexF64}}: - (0.25302539267845964 + 0.0im, 0.322913174072047 + 0.0im, 0.48199234088257203 + 0.0im, 0.774201921982351 + 0.0im) - (0.08084058845575778 + 0.46550907490257704im, 0.16361072959559492 - 0.20526827902633993im, -0.06286027036719286 - 0.6630573167350086im, -0.47879640378455346 - 0.18713670961291684im) - (0.08084058845575778 - 0.46550907490257704im, 0.16361072959559492 + 0.20526827902633993im, -0.06286027036719286 + 0.6630573167350086im, -0.47879640378455346 + 0.18713670961291684im) - (0.22573986355213632 + 0.0im, -0.5730667760748933 + 0.0im, 0.655989711683001 + 0.0im, -0.4362493350466509 + 0.0im) -``` +### v0.9 +KrylovKit v0.9 adds two new sets of functionality: +* The function `lssolve` can be used to solve linear least squares problems, i.e. problems of the form `x = argmin(norm(A*x - b))` + for a given linear map `A` and vector `b`. Currently, only one algorithm is implemented, namely the LSMR algorithm + of Fong and Saunders. +* There are now new functions `reallinsolve` and `realeigsolve` (and `reallssolve` since v0.9.4), which are useful when + using vectors with complex arithmetic, but where the linear map (implemented as a function `f`) acts as a real linear map, + meaning that it only satisfies `f(α*x) = α*f(x)` when `α` is a real number. This occurs for example when computing the + Jacobian of a complex function that is not holomorphic, e.g. in the context of automatic differentation. This is implemented + by simply wrapping the vector as `RealVec`, which is a specific `InnerProductVec` type where the redefined inner product + forgets about the imaginary part of the original `inner` function, thereby effectively treating the vector as living in a + real vector space. Furthermore, in this setting, only real linear combinations of vectors are allowed, so that for the + case of `eigsolve`, only real eigenvalues and eigenvectors are computed. An error will be thrown if the requested list + of eigenvalues contains complex eigenvalues. + +In addition, the following is technically a breaking change: +* The verbosity system, the different verbosity levels and the output formatting have been redesigned (both in the primal methods + and the rrules). The default verbosity level is now 1, which means that warnings will be printed by default, but all other output + (info messages) are suppressed. Before, the default verbosity was such that all output (including warnings) were suppressed. ## Overview KrylovKit.jl accepts general functions or callable objects as linear maps, and general Julia @@ -68,6 +57,7 @@ objects with vector like behavior (as defined in the docs) as vectors. The high level interface of KrylovKit is provided by the following functions: * `linsolve`: solve linear systems +* `lssolve`: solve least squares problems * `eigsolve`: find a few eigenvalues and corresponding eigenvectors * `geneigsolve`: find a few generalized eigenvalues and corresponding vectors * `svdsolve`: find a few singular values and corresponding left and right singular vectors @@ -94,7 +84,8 @@ julia> import Pkg; Pkg.add("KrylovKit.jl") ## Project Status -The package is tested against Julia `1.0`, the current stable and the nightly builds of the Julia `master` branch on Linux, macOS, and Windows, 32- and 64-bit architecture and with `1` and `4` threads. +The package is tested against Julia `1.6`, the long-term stable release (1.10), the current stable release as well +as nightly builds of the Julia `master` branch on Linux, macOS, and Windows 64-bit architecture and with `1` and `4` threads. ## Questions and Contributions diff --git a/docs/Project.toml b/docs/Project.toml index b9b77958..1814eb33 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -2,4 +2,4 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" [compat] -Documenter = "0.25 - 0.27" +Documenter = "1" diff --git a/docs/make.jl b/docs/make.jl index 2bcfe460..682fe453 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -3,13 +3,15 @@ using KrylovKit makedocs(; modules=[KrylovKit], sitename="KrylovKit.jl", - authors="Jutho Haegeman", + authors="Jutho Haegeman and collaborators", pages=["Home" => "index.md", "Manual" => ["man/intro.md", "man/linear.md", + "man/leastsquares.md", "man/eig.md", "man/svd.md", "man/matfun.md", + "man/reallinear.md", "man/algorithms.md", "man/implementation.md"]], format=Documenter.HTML(; prettyurls=get(ENV, "CI", nothing) == "true")) diff --git a/docs/src/index.md b/docs/src/index.md index bc733ccb..c9e89734 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -10,15 +10,21 @@ objects with vector like behavior (see below) as vectors. The high level interface of KrylovKit is provided by the following functions: * [`linsolve`](@ref): solve linear systems `A*x = b` +* [`lssolve`](@ref): solve least square problems `A*x ≈ b` * [`eigsolve`](@ref): find a few eigenvalues and corresponding eigenvectors of an eigenvalue problem `A*x = λ x` * [`geneigsolve`](@ref): find a few eigenvalues and corresponding vectors of a generalized eigenvalue problem `A*x = λ*B*x` * [`svdsolve`](@ref): find a few singular values and corresponding left and right - singular vectors `A*x = σ * y` and `A'*y = σ*x`. -* [`exponentiate`](@ref): apply the exponential of a linear map to a vector -* [`expintegrator`](@ref): exponential integrator for a linear non-homogeneous ODE, - generalization of `exponentiate` + singular vectors `A*x = σ * y` and `A'*y = σ*x` +* [`exponentiate`](@ref): apply the exponential of a linear map to a vector `x=exp(t*A)*x₀` +* [`expintegrator`](@ref): exponential integrator for a linear non-homogeneous ODE + (generalization of `exponentiate`) + +Furthermore, for specialised use cases, there are functions that can deal with so-called +"real linear maps", which arise e.g. in the context of differentiable programming: +* [`reallinsolve`](@ref) and [`realeigsolve`](@ref) + ## Package features and alternatives This section could also be titled "Why did I create KrylovKit.jl"? @@ -29,8 +35,10 @@ There are already a fair number of packages with Krylov-based or other iterative square problems, eigenvalue and singular value problems * [Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl): part of the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organisation, solves - linear systems and least square problems, specific for linear operators from - [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl). + linear systems and least square problems on CPU or GPU for any data type that supports `mul!()`, + including dense and sparse matrices, and abstract operators such as those defined from + [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl) or + [LinearMaps.jl](https://github.com/JuliaLinearAlgebra/LinearMaps.jl). * [KrylovMethods.jl](https://github.com/lruthotto/KrylovMethods.jl): specific for sparse matrices * [Expokit.jl](https://github.com/acroy/Expokit.jl): application of the matrix @@ -47,7 +55,7 @@ There are already a fair number of packages with Krylov-based or other iterative contains implementations of [high order exponential integrators](https://docs.juliadiffeq.org/latest/solvers/split_ode_solve/#OrdinaryDiffEq.jl-2) with adaptive Krylov-subspace calculations for solving semilinear and nonlinear ODEs. -These packages have certainly inspired and influenced the development of KrylovKit.jl. +Some of these packages have certainly inspired and influenced the development of KrylovKit.jl. However, KrylovKit.jl distinguishes itself from the previous packages in the following ways: 1. KrylovKit accepts general functions to represent the linear map or operator that defines @@ -61,20 +69,21 @@ However, KrylovKit.jl distinguishes itself from the previous packages in the fol 2. KrylovKit does not assume that the vectors involved in the problem are actual subtypes of `AbstractVector`. Any Julia object that behaves as a vector is supported, so in particular higher-dimensional arrays or any custom user type that supports the - interface as defined in - [`VectorInterface.jl`](https://github.com/Jutho/VectorInterface.jl) - - Algorithms in KrylovKit.jl are tested against such a minimal implementation (named - `MinimalVec`) in the test suite. This type is only defined in the tests. However, - KrylovKit provides two types implementing this interface and slightly more, to make - them behave more like `AbstractArrays` (e.g. also `Base.:+` etc), which can facilitate - certain applications: - * [`RecursiveVec`](@ref) can be used for grouping a set of vectors into a single - vector like structure (can be used recursively). This is more robust than trying to - use nested `Vector{<:Vector}` types. - * [`InnerProductVec`](@ref) can be used to redefine the inner product (i.e. `inner`) - and corresponding norm (`norm`) of an already existing vector like object. The - latter should help with implementing certain type of preconditioners. + interface as defined in [`VectorInterface.jl`](https://github.com/Jutho/VectorInterface.jl). + Aside from arrays filled with scalar entries, this includes tuples, named tuples, and + arbitrarily nested combinations of tuples and arrays. Furthermore, `CuArray` objects + are fully supported as vectors, so that the application of the linear operator on the + vector can be executed on a GPU. The computations performed within the Krylov subspace, + such as diagonalising the projected matrix, are however always performed on the CPU. + +3. Since version 0.8, KrylovKit.jl supports reverse-mode AD by defining `ChainRulesCore.rrule` + definitions for the most common functionality (`linsolve`, `eigsolve`, `svdsolve`). + Hence, reverse mode AD engines that are compatible with the [ChainRules](https://juliadiff.org/ChainRulesCore.jl/dev/) + ecosystem will be able to benefit from an optimized implementation of the adjoint + of these functions. The `rrule` definitions for the remaining functionality + (`geneigsolve` and `expintegrator`, of which `exponentiate` is a special case) will be + added at a later stage. There is a dedicated documentation page on how to configure these + `rrule`s, as they also require to solve large-scale linear or eigenvalue problems. ## Current functionality @@ -94,8 +103,8 @@ The following algorithms are currently implemented 2, it becomes equivalent to the latter. * `svdsolve`: finding largest singular values based on Golub-Kahan-Lanczos bidiagonalization (see [`GKL`](@ref)) -* `exponentiate`: a [`Lanczos`](@ref) based algorithm for the action of the exponential of - a real symmetric or complex hermitian linear map. +* `exponentiate`: a [`Lanczos`](@ref) or [`Arnoldi`](@ref) based algorithm for the action + of the exponential of linear map. * `expintegrator`: [exponential integrator](https://en.wikipedia.org/wiki/Exponential_integrator) for a linear non-homogeneous ODE, computes a linear combination of the `ϕⱼ` functions which generalize `ϕ₀(z) = exp(z)`. @@ -104,13 +113,13 @@ The following algorithms are currently implemented Here follows a wish list / to-do list for the future. Any help is welcomed and appreciated. * More algorithms, including biorthogonal methods: - - for `linsolve`: MINRES, BiCG, IDR(s), ... + - for `linsolve`: L-GMRES, MINRES, BiCG, IDR(s), ... + - for `lssolve`: LSQR, ... - for `eigsolve`: BiLanczos, Jacobi-Davidson JDQR/JDQZ, subspace iteration (?), ... - for `geneigsolve`: trace minimization, ... * Support both in-place / mutating and out-of-place functions as linear maps * Reuse memory for storing vectors when restarting algorithms (related to previous) * Support non-BLAS scalar types using GeneralLinearAlgebra.jl and GeneralSchur.jl -* Least square problems * Nonlinear eigenvalue problems * Preconditioners * Refined Ritz vectors, Harmonic Ritz values and vectors diff --git a/docs/src/man/algorithms.md b/docs/src/man/algorithms.md index 86b41a13..d6c3cd4f 100644 --- a/docs/src/man/algorithms.md +++ b/docs/src/man/algorithms.md @@ -24,6 +24,7 @@ KrylovKit.MINRES GMRES KrylovKit.BiCG BiCGStab +LSMR ``` ## Specific algorithms for generalized eigenvalue problems ```@docs diff --git a/docs/src/man/eig.md b/docs/src/man/eig.md index a4187922..bedceb3e 100644 --- a/docs/src/man/eig.md +++ b/docs/src/man/eig.md @@ -39,6 +39,43 @@ T, vecs, vals, info = schursolve(A, x⁠₀, 1, :LM, Arnoldi(...)) and use `vecs[1]` as the real valued eigenvector (after checking `info.converged`) corresponding to the largest magnitude eigenvalue of `A`. +More generally, if you want to compute several eigenvalues of a real linear map, and you know +that all of them are real, so that also the associated eigenvectors will be real, then you +can use the [`realeigsolve`](@ref) method. + +## Automatic differentation + +The `eigsolve` (and `realeigsolve`) routine can be used in conjunction with reverse-mode automatic +differentiation, using AD engines that are compatible with the [ChainRules](https://juliadiff.org/ChainRulesCore.jl/dev/) +ecosystem. The adjoint problem of an eigenvalue problem is a linear problem, although it can also +be formulated as an eigenvalue problem. Details about this approach will be published in a +forthcoming manuscript. + +In either case, the adjoint problem requires the adjoint[^1] of the linear map. If the linear map is +an `AbstractMatrix` instance, its `adjoint` will be used in the `rrule`. If the linear map is implemented +as a function `f`, then the AD engine itself is used to compute the corresponding adjoint via +`ChainRulesCore.rrule_via_ad(config, f, x)`. The specific base point `x` at which this adjoint is +computed should not affect the result if `f` properly represents a linear map. Furthermore, the linear +map is the only argument that affects the `eigsolve` output (from a theoretical perspective, the +starting vector and algorithm parameters should have no effect), so that this is where the adjoint +variables need to be propagated to and have a nonzero effect. + +The adjoint problem (also referred to as cotangent problem) can thus be solved as a linear problem +or as an eigenvalue problem. Note that this eigenvalue problem is never symmetric or Hermitian, +even if the primal problem is. The different implementations of the `rrule` can be selected using +the `alg_rrule` keyword argument. If a linear solver such as `GMRES` or `BiCGStab` is specified, +the adjoint problem requires solving a number of linear problems equal to the number of requested +eigenvalues and eigenvectors. If an eigenvalue solver is specified, for which `Arnoldi` is essentially +the only option, then the adjoint problem is solved as a single (but larger) eigenvalue problem. + +Note that the phase of an eigenvector is not uniquely determined. Hence, a well-defined cost function +constructed from eigenvectors should depend on these in such a way that its value is not affected +by changing the phase of those eigenvectors, i.e. the cost function should be 'gauge invariant'. +If this is not the case, the cost function is said to be 'gauge dependent', and this can be detected +in the resulting adjoint variables for those eigenvectors. The KrylovKit `rrule` for `eigsolve` +will print a warning if it detects from the incoming adjoint variables that the cost function is gauge +dependent. This warning can be suppressed by passing `alg_rrule` an algorithm with `verbosity=-1`. + ## Generalized eigenvalue problems Generalized eigenvalues `λ` and corresponding vectors `x` of the generalized eigenvalue @@ -54,3 +91,8 @@ properties explicitly. ```@docs geneigsolve ``` + +Currently, there is `rrule` and thus no automatic differentiation support for `geneigsolve`. + +[^1]: For a linear map, the adjoint or pullback required in the reverse-order chain rule coincides +with its (conjugate) transpose, at least with respect to the standard Euclidean inner product. diff --git a/docs/src/man/implementation.md b/docs/src/man/implementation.md index f34c8d47..70be79c0 100644 --- a/docs/src/man/implementation.md +++ b/docs/src/man/implementation.md @@ -14,11 +14,16 @@ KrylovKit.OrthonormalBasis ``` We can orthogonalize or orthonormalize a given vector to another vector (assumed normalized) -or to a given [`KrylovKit.OrthonormalBasis`](@ref). +or to a given [`KrylovKit.OrthonormalBasis`](@ref) using ```@docs KrylovKit.orthogonalize KrylovKit.orthonormalize ``` +or using the possibly in-place versions +```@docs +KrylovKit.orthogonalize!! +KrylovKit.orthonormalize!! +``` The expansion coefficients of a general vector in terms of a given orthonormal basis can be obtained as ```@docs diff --git a/docs/src/man/intro.md b/docs/src/man/intro.md index 1f166856..b60bb8c1 100644 --- a/docs/src/man/intro.md +++ b/docs/src/man/intro.md @@ -76,9 +76,9 @@ results..., info = problemsolver(A, args..., algorithm(; kwargs...)) Most `algorithm` constructions take the same keyword arguments (`tol`, `krylovdim`, `maxiter` and `verbosity`) discussed above. -As mentioned before, there are two auxiliary structs that can be used to define new vectors, -namely +While KrylovKit.jl does currently not provide a general interface for including +preconditioners, it is possible to e.g. use a modified inner product. KrylovKit.jl provides +a specific type for this purpose: ```@docs -RecursiveVec InnerProductVec ``` diff --git a/docs/src/man/leastsquares.md b/docs/src/man/leastsquares.md new file mode 100644 index 00000000..05db21dd --- /dev/null +++ b/docs/src/man/leastsquares.md @@ -0,0 +1,10 @@ +# Least squares problems + +Least square problems take the form of finding `x` that minimises `norm(b - A*x)` where +`A` should be a linear map. As opposed to linear systems, the input and output of the linear +map do not need to be the same, so that `x` (input) and `b` (output) can live in different +vector spaces. Such problems can be solved using the function `lssolve`: + +```@docs +lssolve +``` diff --git a/docs/src/man/linear.md b/docs/src/man/linear.md index ceecfecc..8ec4ce03 100644 --- a/docs/src/man/linear.md +++ b/docs/src/man/linear.md @@ -1,5 +1,7 @@ # Linear problems +## Linear systems + Linear systems are of the form `A*x=b` where `A` should be a linear map that has the same type of output as input, i.e. the solution `x` should be of the same type as the right hand side `b`. They can be solved using the function `linsolve`: @@ -7,3 +9,25 @@ side `b`. They can be solved using the function `linsolve`: ```@docs linsolve ``` + +## Automatic differentation + +The `linsolve` routine can be used in conjunction with reverse-mode automatic differentiation, +using AD engines that are compatible with the [ChainRules](https://juliadiff.org/ChainRulesCore.jl/dev/) +ecosystem. The adjoint problem of a linear problem is again a linear problem, that requires the +adjoint[^1] of the linear map. If the linear map is an `AbstractMatrix` instance, its `adjoint` +will be used in the `rrule`. If the linear map is implemented as a function `f`, then the AD engine +itself is used to compute the corresponding adjoint via `ChainRulesCore.rrule_via_ad(config, f, x)`. +The specific base point `x` at which this adjoint is computed should not affect the result if `f` +properly represents a linear map. Furthermore, the `linsolve` output is only affected by the linear +map argument and the right hand side argument `b` (from a theoretical perspective, the starting vector +and algorithm parameters should have no effect), so that these two arguments are where the adjoint +variables need to be propagated to and have a nonzero effect. + +The adjoint linear problem (also referred to as cotangent problem) is by default solved using the +same algorithms as the primal problem. However, the `rrule` can be customized to use a different +Krylov algorithm, by specifying the `alg_rrule` keyword argument. Its value can take any of the values +as the `algorithm` argument in `linsolve`. + +[^1]: For a linear map, the adjoint or pullback required in the reverse-order chain rule coincides +with its (conjugate) transpose, at least with respect to the standard Euclidean inner product. \ No newline at end of file diff --git a/docs/src/man/matfun.md b/docs/src/man/matfun.md index 7a1153a5..fcb62176 100644 --- a/docs/src/man/matfun.md +++ b/docs/src/man/matfun.md @@ -1,5 +1,5 @@ -# Functions of matrices and linear maps -Applying a function of a matrix or linear map to a given vector can in some cases also be +# Functions of matrices and linear operators +Applying a function of a matrix or linear operator to a given vector can in some cases also be computed using Krylov methods. One example is the inverse function, which exactly corresponds to what `linsolve` computes: ``A^{-1} * b``. There are other functions ``f`` for which ``f(A) * b`` can be computed using Krylov techniques, i.e. where ``f(A) * b`` can diff --git a/docs/src/man/reallinear.md b/docs/src/man/reallinear.md new file mode 100644 index 00000000..b2791f92 --- /dev/null +++ b/docs/src/man/reallinear.md @@ -0,0 +1,49 @@ +# Real linear maps + +A map $$f: V \to V$$ from some vector space $$V$$ to itself is said to be a real linear map if +it satisfies $$f(\alpha x + \beta y) = \alpha f(x) + \beta f(y)$$ for all $$x, y \in V$$ and +all $$\alpha, \beta \in \mathbb{R}$$. When $$V$$ is itself a real vector space, this is just +the natural concept of a linear map. However, this definition can be used even if $$x$$ and +$$y$$ are naturally represented using complex numbers and arithmetic and also admit complex linear +combinations, i.e. if $$V$$ is a complex vector space. + +Such real linear maps arise whenever `f(x)` involves calling `conj(x)`, and are for example +obtained in the context of Jacobians (pullbacks) of complex valued functions that are not +holomorphic. + +To deal with real linear maps, one should reinterpret $$V$$ as a real vector space, by +restricting the possible linear combinations to those with real scalar coefficients, and by +using the real part of the inner product. When the vectors are explictly represented as +some `AbstractVector{Complex{T}}`, this could be obtained by explicitly splitting +them in their real and imaginary parts and stacking those into `AbstractVector{T}` objects +with twice the original length. + +However, KrylovKit.jl admits a different approach, where the original representation of +vectors is kept, and the inner product is simply replaced by its real part. KrylovKit.jl +offers specific methods for solving linear systems and eigenvalue systems in this way. For +linear problems and least square problems, this is implemented using `reallinsolve` and +`reallssolve`: + +```@docs +reallinsolve +reallssolve +``` + +In the case of eigenvalue systems, a similar method `realeigsolve` is available. In this +context, only real eigenvalues are meaningful, as the corresponding eigenvectors should be +built from real linear combinations of the vectors that span the (real) Krylov subspace. +This approach can also be applied to linear maps on vectors that were naturally real to +begin with, if it is guaranteed that the targetted eigenvalues are real. In that case, also +the associated eigenvectors will be returned using only real arithmic. This is contrast +with `eigsolve`, which will always turn to complex arithmetic if the linear map is real but +not symmetric. An error will be thrown if complex eigenvalues are encountered within the +targetted set. + +```@docs +realeigsolve +``` + +Note that `reallinsolve`, `reallssolve` and `realeigsolve` currently only exist with the +"expert" mode interface, where the user has to manually specify the underlying Krylov +algorithm and its parameters, i.e. `GMRES` or `BiCGStab` for `reallinsolve`, `LSMR` for +`reallssolve` and `Arnoldi` for `realeigsolve`. \ No newline at end of file diff --git a/docs/src/man/svd.md b/docs/src/man/svd.md index cf55eb65..f8872352 100644 --- a/docs/src/man/svd.md +++ b/docs/src/man/svd.md @@ -1,7 +1,43 @@ # Singular value problems + +## Singular values and singular vectors It is possible to iteratively compute a few singular values and corresponding left and right singular vectors using the function `svdsolve`: ```@docs svdsolve ``` + +## Automatic differentation + +The `svdsolve` routine can be used in conjunction with reverse-mode automatic differentiation, +using AD engines that are compatible with the [ChainRules](https://juliadiff.org/ChainRulesCore.jl/dev/) +ecosystem. The adjoint problem of a singular value problem contains a linear problem, although it +can also be formulated as an eigenvalue problem. Details about this approach will be published in a +forthcoming manuscript. + +Both `svdsolve` and the adjoint problem associated with it require the action of the linear map as +well as of its adjoint[^1]. Hence, no new information about the linear map is required for the adjoint +problem. However, the linear map is the only argument that affects the `svdsolve` output (from a +theoretical perspective, the starting vector and algorithm parameters should have no effect), so that +this is where the adjoint variables need to be propagated to. + +The adjoint problem (also referred to as cotangent problem) can thus be solved as a linear problem +or as an eigenvalue problem. Note that this eigenvalue problem is never symmetric or Hermitian. +The different implementations of the `rrule` can be selected using the `alg_rrule` keyword argument. +If a linear solver such as `GMRES` or `BiCGStab` is specified, the adjoint problem requires solving a] +number of linear problems equal to the number of requested singular values and vectors. If an +eigenvalue solver is specified, for which `Arnoldi` is essentially the only option, then the adjoint +problem is solved as a single (but larger) eigenvalue problem. + +Note that the common pair of left and right singular vectors has an arbitrary phase freedom. +Hence, a well-defined cost function constructed from singular should depend on these in such a way +that its value is not affected by simultaneously changing the left and right singular vector with +a common phase factor, i.e. the cost function should be 'gauge invariant'. If this is not the case, +the cost function is said to be 'gauge dependent', and this can be detected in the resulting adjoint +variables for those singular vectors. The KrylovKit `rrule` for `svdsolve` will print a warning if +it detects from the incoming adjoint variables that the cost function is gauge dependent. This +warning can be suppressed by passing `alg_rrule` an algorithm with `verbosity=-1`. + +[^1]: For a linear map, the adjoint or pullback required in the reverse-order chain rule coincides +with its (conjugate) transpose, at least with respect to the standard Euclidean inner product. diff --git a/ext/KrylovKitChainRulesCoreExt/KrylovKitChainRulesCoreExt.jl b/ext/KrylovKitChainRulesCoreExt/KrylovKitChainRulesCoreExt.jl new file mode 100644 index 00000000..68677480 --- /dev/null +++ b/ext/KrylovKitChainRulesCoreExt/KrylovKitChainRulesCoreExt.jl @@ -0,0 +1,16 @@ +module KrylovKitChainRulesCoreExt + +using KrylovKit +using ChainRulesCore +using LinearAlgebra +using VectorInterface + +using KrylovKit: apply_normal, apply_adjoint +using KrylovKit: WARN_LEVEL, STARTSTOP_LEVEL, EACHITERATION_LEVEL + +include("utilities.jl") +include("linsolve.jl") +include("eigsolve.jl") +include("svdsolve.jl") + +end # module diff --git a/ext/KrylovKitChainRulesCoreExt/eigsolve.jl b/ext/KrylovKitChainRulesCoreExt/eigsolve.jl new file mode 100644 index 00000000..28ac92e2 --- /dev/null +++ b/ext/KrylovKitChainRulesCoreExt/eigsolve.jl @@ -0,0 +1,437 @@ +function ChainRulesCore.rrule(config::RuleConfig, + ::typeof(eigsolve), + f, + x₀, + howmany, + which, + alg_primal; + alg_rrule=Arnoldi(; tol=alg_primal.tol, + krylovdim=alg_primal.krylovdim, + maxiter=alg_primal.maxiter, + eager=alg_primal.eager, + orth=alg_primal.orth, + verbosity=alg_primal.verbosity)) + (vals, vecs, info) = eigsolve(f, x₀, howmany, which, alg_primal) + if alg_primal isa Lanczos + fᴴ = f + elseif f isa AbstractMatrix + fᴴ = adjoint(f) + else + fᴴ = let pb = rrule_via_ad(config, f, zerovector(x₀, complex(scalartype(x₀))))[2] + v -> unthunk(pb(v)[2]) + end + end + eigsolve_pullback = make_eigsolve_pullback(config, f, fᴴ, x₀, howmany, which, + alg_primal, alg_rrule, vals, vecs, info) + return (vals, vecs, info), eigsolve_pullback +end + +function make_eigsolve_pullback(config, f, fᴴ, x₀, howmany, which, alg_primal, alg_rrule, + vals, vecs, info) + function eigsolve_pullback(ΔX) + ∂self = NoTangent() + ∂x₀ = ZeroTangent() + ∂howmany = NoTangent() + ∂which = NoTangent() + ∂alg = NoTangent() + + # Prepare inputs: + #---------------- + _Δvals = unthunk(ΔX[1]) + _Δvecs = unthunk(ΔX[2]) + # special case: propagate zero tangent + if _Δvals isa AbstractZero && _Δvecs isa AbstractZero + ∂f = ZeroTangent() + return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg + end + # discard vals/vecs from n + 1 onwards if contribution is zero + _n_vals = _Δvals isa AbstractZero ? nothing : findlast(!iszero, _Δvals) + _n_vecs = _Δvecs isa AbstractZero ? nothing : + findlast(!Base.Fix2(isa, AbstractZero), _Δvecs) + n_vals = isnothing(_n_vals) ? 0 : _n_vals + n_vecs = isnothing(_n_vecs) ? 0 : _n_vecs + n = max(n_vals, n_vecs) + # special case (can this happen?): try to maintain type stability + if n == 0 + if howmany == 0 + T = (alg_primal isa Lanczos) ? scalartype(x₀) : complex(scalartype(x₀)) + _vecs = [zerovector(x₀, T)] + ws = [_vecs[1]] + ∂f = construct∂f_eig(config, f, _vecs, ws) + return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg + else + ws = [zerovector(vecs[1])] + ∂f = construct∂f_eig(config, f, vecs, ws) + return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg + end + end + Δvals = fill(zero(vals[1]), n) + if n_vals > 0 + Δvals[1:n_vals] .= view(_Δvals, 1:n_vals) + end + if _Δvecs isa AbstractZero + # case of no contribution of singular vectors + Δvecs = fill(ZeroTangent(), n) + else + Δvecs = fill(zerovector(vecs[1]), n) + if n_vecs > 0 + for i in 1:n_vecs + if !(_Δvecs[i] isa AbstractZero) + Δvecs[i] = _Δvecs[i] + end + end + end + end + + # Compute actual pullback data: + #------------------------------ + ws = compute_eigsolve_pullback_data(Δvals, Δvecs, view(vals, 1:n), view(vecs, 1:n), + info, which, fᴴ, alg_primal, alg_rrule) + + # Return pullback in correct form: + #--------------------------------- + ∂f = construct∂f_eig(config, f, vecs, ws) + return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg + end + return eigsolve_pullback +end + +function compute_eigsolve_pullback_data(Δvals, Δvecs, vals, vecs, info, which, fᴴ, + alg_primal, alg_rrule::Union{GMRES,BiCGStab}) + ws = similar(vecs, length(Δvecs)) + T = scalartype(vecs[1]) + @inbounds for i in 1:length(Δvecs) + Δλ = Δvals[i] + Δv = Δvecs[i] + λ = vals[i] + v = vecs[i] + + # First treat special cases + if isa(Δv, AbstractZero) && iszero(Δλ) # no contribution + ws[i] = zerovector(v) + continue + end + if isa(Δv, AbstractZero) && isa(alg_primal, Lanczos) # simple contribution + ws[i] = scale(v, Δλ) + continue + end + + # TODO: Is the following useful and correct? + # (given that Δvecs might contain weird tangent types) + # The following only holds if `f` represents a real linear operator, which we cannot + # check explicitly, unless `f isa AbstractMatrix`.` + # However, exact equality between conjugate pairs of eigenvalues and eigenvectors + # seems sufficient to guarantee this + # Also, we can only be sure to know how to apply complex conjugation when the + # vectors are of type `AbstractArray{T}` with `T` the scalar type + # if i > 1 && ws[i - 1] isa AbstractArray{T} && + # vals[i] == conj(vals[i - 1]) && Δvals[i] == conj(Δvals[i - 1]) && + # vecs[i] == conj(vecs[i - 1]) && Δvecs[i] == conj(Δvecs[i - 1]) + # ws[i] = conj(ws[i - 1]) + # continue + # end + + if isa(Δv, AbstractZero) + b = (zerovector(v), convert(T, Δλ)) + else + vdΔv = inner(v, Δv) + if alg_rrule.verbosity >= WARN_LEVEL + gauge = abs(imag(vdΔv)) + gauge > alg_primal.tol && + @warn "`eigsolve` cotangent for eigenvector $i is sensitive to gauge choice: (|gauge| = $gauge)" + end + Δv = add(Δv, v, -vdΔv) + b = (Δv, convert(T, Δλ)) + end + w, reverse_info = let λ = λ, v = v + linsolve(b, zerovector(b), alg_rrule) do (x1, x2) + y1 = VectorInterface.add!!(VectorInterface.add!!(KrylovKit.apply(fᴴ, x1), + x1, conj(λ), -1), + v, x2) + y2 = inner(v, x1) + return (y1, y2) + end + end + if info.converged >= i && reverse_info.converged == 0 && + alg_primal.verbosity >= WARN_LEVEL + @warn "`eigsolve` cotangent linear problem ($i) did not converge, whereas the primal eigenvalue problem did: normres = $(reverse_info.normres)" + elseif abs(w[2]) > (alg_rrule.tol * norm(w[1])) && + alg_primal.verbosity >= WARN_LEVEL + @warn "`eigsolve` cotangent linear problem ($i) returns unexpected result: error = $(w[2])" + end + ws[i] = w[1] + end + return ws +end + +function compute_eigsolve_pullback_data(Δvals, Δvecs, vals, vecs, info, which, fᴴ, + alg_primal::Arnoldi, alg_rrule::Arnoldi) + n = length(Δvecs) + T = scalartype(vecs[1]) + G = zeros(T, n, n) + VdΔV = zeros(T, n, n) + for j in 1:n + for i in 1:n + if i < j + G[i, j] = conj(G[j, i]) + elseif i == j + G[i, i] = norm(vecs[i])^2 + else + G[i, j] = inner(vecs[i], vecs[j]) + end + if !(Δvecs[j] isa AbstractZero) + VdΔV[i, j] = inner(vecs[i], Δvecs[j]) + end + end + end + + # components along subspace spanned by current eigenvectors + tol = alg_primal.tol + if alg_rrule.verbosity >= WARN_LEVEL + mask = abs.(transpose(vals) .- vals) .< tol + gaugepart = VdΔV[mask] - Diagonal(real(diag(VdΔV)))[mask] + Δgauge = norm(gaugepart, Inf) + Δgauge > tol && + @warn "`eigsolve` cotangents sensitive to gauge choice: (|Δgauge| = $Δgauge)" + end + VdΔV′ = VdΔV - G * Diagonal(diag(VdΔV) ./ diag(G)) + aVdΔV = VdΔV′ .* conj.(safe_inv.(transpose(vals) .- vals, tol)) + for i in 1:n + aVdΔV[i, i] += Δvals[i] + end + Gc = cholesky!(G) + iGaVdΔV = Gc \ aVdΔV + iGVdΔV = Gc \ VdΔV + + zs = similar(vecs) + for i in 1:n + z = scale(vecs[1], iGaVdΔV[1, i]) + for j in 2:n + z = VectorInterface.add!!(z, vecs[j], iGaVdΔV[j, i]) + end + zs[i] = z + end + + # components in orthogonal subspace: + # solve Sylvester problem (A * (1-P) + shift * P) * W - W * Λ = ΔV as eigenvalue problem + # with ΔVᵢ = fᴴ(zᵢ) + (1 - P) * Δvᵢ + # where we can recylce information in the computation of P * Δvᵢ + sylvesterarg = similar(vecs) + for i in 1:n + y = KrylovKit.apply(fᴴ, zs[i]) + if !(Δvecs[i] isa AbstractZero) + y = VectorInterface.add!!(y, Δvecs[i]) + for j in 1:n + y = VectorInterface.add!!(y, vecs[j], -iGVdΔV[j, i]) + end + end + sylvesterarg[i] = y + end + + # To solve Sylvester problem as eigenvalue problem, we potentially need to shift the + # eigenvalues zero that originate from the projection onto the orthognal complement of + # original subspace, namely whenever zero is more extremal than the actual eigenvalues. + # Hereto, we shift the zero eigenvalues in the original subspace to the value 2 * vals[n], + # where we expect that if `by(vals[n]) > by(0)`, then `by(2*vals[n]) > by(vals[n])` + # (whenever `rev = false`, and with opposite inequality whenever `rev = true`) + by, rev = KrylovKit.eigsort(which) + if (rev ? (by(vals[n]) < by(zero(vals[n]))) : (by(vals[n]) > by(zero(vals[n])))) + shift = 2 * conj(vals[n]) + else + shift = zero(vals[n]) + end + # The ith column wᵢ of the solution to the Sylvester equation is contained in the + # the eigenvector (wᵢ, eᵢ) corresponding to eigenvalue λᵢ of the block matrix + # [(A * (1-P) + shift * P) -ΔV; 0 Λ], where eᵢ is the ith unit vector. We will need + # to renormalise the eigenvectors to have exactly eᵢ as second component. We use + # (0, e₁ + e₂ + ... + eₙ) as the initial guess for the eigenvalue problem. + + W₀ = (zerovector(vecs[1]), one.(vals)) + P = orthogonalprojector(vecs, n, Gc) + # TODO: is `realeigsolve` every used here, as there is a separate `alg_primal::Lanczos` method below + solver = (T <: Real) ? KrylovKit.realeigsolve : KrylovKit.eigsolve # for `eigsolve`, `T` will always be a Complex subtype` + rvals, Ws, reverse_info = let P = P, ΔV = sylvesterarg, shift = shift, + eigsort = EigSorter(v -> minimum(DistanceTo(conj(v)), vals)) + + solver(W₀, n, eigsort, alg_rrule) do (w, x) + w₀ = P(w) + w′ = KrylovKit.apply(fᴴ, add(w, w₀, -1)) + if !iszero(shift) + w′ = VectorInterface.add!!(w′, w₀, shift) + end + @inbounds for i in eachindex(x) # length(x) = n but let us not use outer variables + w′ = VectorInterface.add!!(w′, ΔV[i], -x[i]) + end + return (w′, conj.(vals) .* x) + end + end + if info.converged >= n && reverse_info.converged < n && + alg_primal.verbosity >= WARN_LEVEL + @warn "`eigsolve` cotangent problem did not converge, whereas the primal eigenvalue problem did" + end + # cleanup and construct final result by renormalising the eigenvectors and explicitly + # checking that they have the expected form and reproduce the expected eigenvalue + ws = zs + tol = alg_rrule.tol + Q = orthogonalcomplementprojector(vecs, n, Gc) + for i in 1:n + d, ic = findmin(DistanceTo(conj(vals[i])), rvals) + w, x = Ws[ic] + factor = 1 / x[i] + x[i] = zero(x[i]) + if alg_primal.verbosity >= WARN_LEVEL + error = max(norm(x, Inf), abs(rvals[ic] - conj(vals[i]))) + error > 10 * tol && + @warn "`eigsolve` cotangent linear problem ($i) returns unexpected result: error = $error" + end + ws[i] = VectorInterface.add!!(zs[i], Q(w), -factor) + end + return ws +end + +struct DistanceTo{T} + x::T +end +(d::DistanceTo)(y) = norm(y - d.x) + +# several simplications happen in the case of a Hermitian eigenvalue problem +function compute_eigsolve_pullback_data(Δvals, Δvecs, vals, vecs, info, which, fᴴ, + alg_primal::Lanczos, alg_rrule::Arnoldi) + n = length(Δvecs) + T = scalartype(vecs[1]) + VdΔV = zeros(T, n, n) + for j in 1:n + for i in 1:n + if !(Δvecs[j] isa AbstractZero) + VdΔV[i, j] = inner(vecs[i], Δvecs[j]) + end + end + end + + # components along subspace spanned by current eigenvectors + tol = alg_primal.tol + aVdΔV = rmul!(VdΔV - VdΔV', 1 / 2) + if alg_rrule.verbosity >= WARN_LEVEL + mask = abs.(transpose(vals) .- vals) .< tol + gaugepart = view(aVdΔV, mask) + gauge = norm(gaugepart, Inf) + gauge > tol && + @warn "`eigsolve` cotangents sensitive to gauge choice: (|gauge| = $gauge)" + end + aVdΔV .= aVdΔV .* safe_inv.(transpose(vals) .- vals, tol) + for i in 1:n + aVdΔV[i, i] += real(Δvals[i]) + end + + zs = similar(vecs) + for i in 1:n + z = scale(vecs[1], aVdΔV[1, i]) + for j in 2:n + z = VectorInterface.add!!(z, vecs[j], aVdΔV[j, i]) + end + zs[i] = z + end + + # components in orthogonal subspace + sylvesterarg = similar(vecs) + for i in 1:n + y = zerovector(vecs[1]) + if !(Δvecs[i] isa AbstractZero) + y = VectorInterface.add!!(y, Δvecs[i], +1) + for j in 1:n + y = VectorInterface.add!!(y, vecs[j], -VdΔV[j, i]) + end + end + sylvesterarg[i] = y + end + + by, rev = KrylovKit.eigsort(which) + if (rev ? (by(vals[n]) < by(zero(vals[n]))) : (by(vals[n]) > by(zero(vals[n])))) + shift = 2 * conj(vals[n]) + else + shift = zero(vals[n]) + end + W₀ = (zerovector(vecs[1]), one.(vals)) + P = orthogonalprojector(vecs, n) + solver = (T <: Real) ? KrylovKit.realeigsolve : KrylovKit.eigsolve + rvals, Ws, reverse_info = let P = P, ΔV = sylvesterarg, shift = shift, + eigsort = EigSorter(v -> minimum(DistanceTo(conj(v)), vals)) + + solver(W₀, n, eigsort, alg_rrule) do (w, x) + w₀ = P(w) + w′ = KrylovKit.apply(fᴴ, add(w, w₀, -1)) + if !iszero(shift) + w′ = VectorInterface.add!!(w′, w₀, shift) + end + @inbounds for i in 1:length(x) # length(x) = n but let us not use outer variables + w′ = VectorInterface.add!!(w′, ΔV[i], -x[i]) + end + return (w′, vals .* x) + end + end + if info.converged >= n && reverse_info.converged < n && + alg_primal.verbosity >= WARN_LEVEL + @warn "`eigsolve` cotangent problem did not converge, whereas the primal eigenvalue problem did" + end + + # cleanup and construct final result + ws = zs + tol = alg_rrule.tol + Q = orthogonalcomplementprojector(vecs, n) + for i in 1:n + w, x = Ws[i] + _, ic = findmax(abs, x) + factor = 1 / x[ic] + x[ic] = zero(x[ic]) + error = max(norm(x, Inf), abs(rvals[i] - conj(vals[ic]))) + if error > 10 * tol && alg_primal.verbosity >= WARN_LEVEL + @warn "`eigsolve` cotangent linear problem ($ic) returns unexpected result: error = $error" + end + ws[ic] = VectorInterface.add!!(zs[ic], Q(w), -factor) + end + return ws +end + +function construct∂f_eig(config, f, vecs, ws) + config isa RuleConfig{>:HasReverseMode} || + throw(ArgumentError("`eigsolve` reverse-mode AD requires AD engine that supports calling back into AD")) + + v = vecs[1] + w = ws[1] + ∂f = rrule_via_ad(config, f, v)[2](w)[1] + for i in 2:length(ws) + v = vecs[i] + w = ws[i] + ∂f = ChainRulesCore.add!!(∂f, rrule_via_ad(config, f, v)[2](w)[1]) + end + return ∂f +end +function construct∂f_eig(config, A::AbstractMatrix, vecs, ws) + if A isa StridedMatrix + return InplaceableThunk(Ā -> _buildĀ_eig!(Ā, vecs, ws), + @thunk(_buildĀ_eig!(zero(A), vecs, ws))) + else + return @thunk(ProjectTo(A)(_buildĀ_eig!(zero(A), vecs, ws))) + end +end + +function _buildĀ_eig!(Ā, vs, ws) + for i in 1:length(ws) + w = ws[i] + v = vs[i] + if !(w isa AbstractZero) + if eltype(Ā) <: Real && eltype(w) <: Complex + mul!(Ā, _realview(w), _realview(v)', +1, +1) + mul!(Ā, _imagview(w), _imagview(v)', +1, +1) + else + mul!(Ā, w, v', +1, 1) + end + end + end + return Ā +end + +function reverse_which(which) + by, rev = KrylovKit.eigsort(which) + return EigSorter(by ∘ conj, rev) +end diff --git a/ext/KrylovKitChainRulesCoreExt/linsolve.jl b/ext/KrylovKitChainRulesCoreExt/linsolve.jl new file mode 100644 index 00000000..e083bf25 --- /dev/null +++ b/ext/KrylovKitChainRulesCoreExt/linsolve.jl @@ -0,0 +1,136 @@ +function ChainRulesCore.rrule(config::RuleConfig, + ::typeof(linsolve), + f, + b, + x₀, + alg_primal, + a₀, + a₁; alg_rrule=alg_primal) + (x, info) = linsolve(f, b, x₀, alg_primal, a₀, a₁) + fᴴ, construct∂f = lin_preprocess(config, f, x) + linsolve_pullback = make_linsolve_pullback(fᴴ, b, a₀, a₁, alg_rrule, construct∂f, x, + info) + return (x, info), linsolve_pullback +end + +function make_linsolve_pullback(fᴴ, b, a₀, a₁, alg_rrule, construct∂f, x, info) + function linsolve_pullback(X̄) + x̄ = unthunk(X̄[1]) + @assert X̄[2] isa AbstractZero "No cotangent of the `info` output is supported." + ∂self = NoTangent() + ∂x₀ = ZeroTangent() + ∂algorithm = NoTangent() + if x̄ isa AbstractZero + ∂f = ZeroTangent() + ∂b = ZeroTangent() + ∂a₀ = ZeroTangent() + ∂a₁ = ZeroTangent() + return ∂self, ∂f, ∂b, ∂x₀, ∂algorithm, ∂a₀, ∂a₁ + end + + x̄₀ = zerovector(x̄, + VectorInterface.promote_scale(scalartype(x̄), + VectorInterface.promote_scale(a₀, + a₁))) + ∂b, reverse_info = linsolve(fᴴ, x̄, x̄₀, alg_rrule, conj(a₀), + conj(a₁)) + if info.converged > 0 && reverse_info.converged == 0 && + alg_primal.verbosity >= WARN_LEVEL + @warn "`linsolve` cotangent problem did not converge, whereas the primal linear problem did: normres = $(reverse_info.normres)" + end + x∂b = inner(x, ∂b) + b∂b = inner(b, ∂b) + ∂f = construct∂f(scale(∂b, -conj(a₁))) + ∂a₀ = -x∂b + ∂a₁ = (x∂b * conj(a₀) - b∂b) / conj(a₁) + + return ∂self, ∂f, ∂b, ∂x₀, ∂algorithm, ∂a₀, ∂a₁ + end +end + +function lin_preprocess(config, f, x) + config isa RuleConfig{>:HasReverseMode} || + throw(ArgumentError("`linsolve` reverse-mode AD requires AD engine that supports calling back into AD")) + pb = rrule_via_ad(config, f, x)[2] + fᴴ, construct∂f_lin = let pb = rrule_via_ad(config, f, x)[2] + v -> unthunk(pb(v)[2]), w -> pb(w)[1] + end + return fᴴ, construct∂f_lin +end +function lin_preprocess(config, A::AbstractMatrix, x) + fᴴ = adjoint(A) + if A isa StridedMatrix + construct∂f_lin = w -> InplaceableThunk(Ā -> _buildĀ_lin!(Ā, x, w), + @thunk(_buildĀ_lin!(zero(A), x, w))) + else + construct∂f_lin = let project_A = ProjectTo(A) + w -> @thunk(project_A(_buildĀ_lin!(zero(A), x, w))) + end + end + return fᴴ, construct∂f_lin +end +function _buildĀ_lin!(Ā, v, w) + if !(w isa AbstractZero) + if eltype(Ā) <: Real && eltype(w) <: Complex + mul!(Ā, _realview(w), _realview(v)', +1, +1) + mul!(Ā, _imagview(w), _imagview(v)', +1, +1) + else + mul!(Ā, w, v', +1, 1) + end + end + return Ā +end + +# frule - currently untested - commented out while untested and unused + +# function ChainRulesCore.frule((_, ΔA, Δb, Δx₀, _, Δa₀, Δa₁)::Tuple, ::typeof(linsolve), +# A::AbstractMatrix, b::AbstractVector, x₀, algorithm, a₀, a₁) +# (x, info) = linsolve(A, b, x₀, algorithm, a₀, a₁) + +# if Δb isa ChainRulesCore.AbstractZero +# rhs = zerovector(b) +# else +# rhs = scale(Δb, (1 - Δa₁)) +# end +# if !iszero(Δa₀) +# rhs = add!!(rhs, x, -Δa₀) +# end +# if !iszero(ΔA) +# rhs = mul!(rhs, ΔA, x, -a₁, true) +# end +# (Δx, forward_info) = linsolve(A, rhs, zerovector(rhs), algorithm, a₀, a₁) +# if info.converged > 0 && forward_info.converged == 0 && alg_rrule.verbosity >= 0 +# @warn "The tangent linear problem did not converge, whereas the primal linear problem did." +# end +# return (x, info), (Δx, NoTangent()) +# end + +# function ChainRulesCore.frule(config::RuleConfig{>:HasForwardsMode}, tangents, +# ::typeof(linsolve), +# A::AbstractMatrix, b::AbstractVector, x₀, algorithm, a₀, a₁) +# return frule(tangents, linsolve, A, b, x₀, algorithm, a₀, a₁) +# end + +# function ChainRulesCore.frule(config::RuleConfig{>:HasForwardsMode}, +# (_, Δf, Δb, Δx₀, _, Δa₀, Δa₁), +# ::typeof(linsolve), +# f, b, x₀, algorithm, a₀, a₁) +# (x, info) = linsolve(f, b, x₀, algorithm, a₀, a₁) + +# if Δb isa AbstractZero +# rhs = zerovector(b) +# else +# rhs = scale(Δb, (1 - Δa₁)) +# end +# if !iszero(Δa₀) +# rhs = add!!(rhs, x, -Δa₀) +# end +# if !(Δf isa AbstractZero) +# rhs = add!!(rhs, frule_via_ad(config, (Δf, ZeroTangent()), f, x), -a₀) +# end +# (Δx, forward_info) = linsolve(f, rhs, zerovector(rhs), algorithm, a₀, a₁) +# if info.converged > 0 && forward_info.converged == 0 && alg_rrule.verbosity >= 0 +# @warn "The tangent linear problem did not converge, whereas the primal linear problem did." +# end +# return (x, info), (Δx, NoTangent()) +# end diff --git a/ext/KrylovKitChainRulesCoreExt/svdsolve.jl b/ext/KrylovKitChainRulesCoreExt/svdsolve.jl new file mode 100644 index 00000000..200575f4 --- /dev/null +++ b/ext/KrylovKitChainRulesCoreExt/svdsolve.jl @@ -0,0 +1,304 @@ +# Reverse rule adopted from tsvd! rrule as found in TensorKit.jl +function ChainRulesCore.rrule(config::RuleConfig, ::typeof(svdsolve), f, x₀, howmany, which, + alg_primal::GKL; + alg_rrule=Arnoldi(; tol=alg_primal.tol, + krylovdim=alg_primal.krylovdim, + maxiter=alg_primal.maxiter, + eager=alg_primal.eager, + orth=alg_primal.orth, + verbosity=alg_primal.verbosity)) + vals, lvecs, rvecs, info = svdsolve(f, x₀, howmany, which, alg_primal) + svdsolve_pullback = make_svdsolve_pullback(config, f, x₀, howmany, which, alg_primal, + alg_rrule, vals, lvecs, rvecs, info) + return (vals, lvecs, rvecs, info), svdsolve_pullback +end + +function make_svdsolve_pullback(config, f, x₀, howmany, which, alg_primal, alg_rrule, vals, + lvecs, rvecs, info) + function svdsolve_pullback(ΔX) + ∂self = NoTangent() + ∂x₀ = ZeroTangent() + ∂howmany = NoTangent() + ∂which = NoTangent() + ∂alg = NoTangent() + + # Prepare inputs: + #---------------- + _Δvals = unthunk(ΔX[1]) + _Δlvecs = unthunk(ΔX[2]) + _Δrvecs = unthunk(ΔX[3]) + # special case: propagate zero tangent + if _Δvals isa AbstractZero && _Δlvecs isa AbstractZero && _Δrvecs isa AbstractZero + ∂f = ZeroTangent() + return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg + end + # discard vals/vecs from n + 1 onwards if contribution is zero + _n_vals = _Δvals isa AbstractZero ? nothing : findlast(!iszero, _Δvals) + _n_lvecs = _Δlvecs isa AbstractZero ? nothing : + findlast(!Base.Fix2(isa, AbstractZero), _Δlvecs) + _n_rvecs = _Δrvecs isa AbstractZero ? nothing : + findlast(!Base.Fix2(isa, AbstractZero), _Δrvecs) + n_vals = isnothing(_n_vals) ? 0 : _n_vals + n_lvecs = isnothing(_n_lvecs) ? 0 : _n_lvecs + n_rvecs = isnothing(_n_rvecs) ? 0 : _n_rvecs + n = max(n_vals, n_lvecs, n_rvecs) + # special case (can this happen?): try to maintain type stability + if n == 0 + if howmany == 0 + _lvecs = [zerovector(x₀)] + _rvecs = [apply_adjoint(f, x₀)] + xs = [_lvecs[1]] + ys = [_rvecs[1]] + ∂f = construct∂f_svd(config, f, _lvecs, _rvecs, xs, ys) + return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg + else + xs = [zerovector(lvecs[1])] + ys = [zerovector(rvecs[1])] + ∂f = construct∂f_svd(config, f, lvecs, rvecs, xs, ys) + return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg + end + end + Δvals = fill(zero(vals[1]), n) + if n_vals > 0 + Δvals[1:n_vals] .= view(_Δvals, 1:n_vals) + end + if _Δlvecs isa AbstractZero && _Δrvecs isa AbstractZero + # case of no contribution of singular vectors + Δlvecs = fill(ZeroTangent(), n) + Δrvecs = fill(ZeroTangent(), n) + else + Δlvecs = fill(zerovector(lvecs[1]), n) + Δrvecs = fill(zerovector(rvecs[1]), n) + if n_lvecs > 0 + Δlvecs[1:n_lvecs] .= view(_Δlvecs, 1:n_lvecs) + end + if n_rvecs > 0 + Δrvecs[1:n_rvecs] .= view(_Δrvecs, 1:n_rvecs) + end + end + + # Compute actual pullback data: + #------------------------------ + xs, ys = compute_svdsolve_pullback_data(Δvals, Δlvecs, Δrvecs, view(vals, 1:n), + view(lvecs, 1:n), view(rvecs, 1:n), + info, f, which, alg_primal, alg_rrule) + + # Return pullback in correct form: + #--------------------------------- + ∂f = construct∂f_svd(config, f, lvecs, rvecs, xs, ys) + return ∂self, ∂f, ∂x₀, ∂howmany, ∂which, ∂alg + end + return svdsolve_pullback +end + +function compute_svdsolve_pullback_data(Δvals, Δlvecs, Δrvecs, vals, lvecs, rvecs, info, f, + which, alg_primal, alg_rrule::Union{GMRES,BiCGStab}) + xs = similar(lvecs, length(Δvals)) + ys = similar(rvecs, length(Δvals)) + for i in 1:length(vals) + Δσ = Δvals[i] + Δu = Δlvecs[i] + Δv = Δrvecs[i] + σ = vals[i] + u = lvecs[i] + v = rvecs[i] + + # First treat special cases + if isa(Δv, AbstractZero) && isa(Δu, AbstractZero) # no contribution + xs[i] = scale(u, real(Δσ) / 2) + ys[i] = scale(v, real(Δσ) / 2) + continue + end + udΔu = inner(u, Δu) + vdΔv = inner(v, Δv) + if (udΔu isa Complex) || (vdΔv isa Complex) + if alg_rrule.verbosity >= WARN_LEVEL + gauge = abs(imag(udΔu + vdΔv)) + gauge > alg_primal.tol && + @warn "`svdsolve` cotangents for singular vectors $i are sensitive to gauge choice: (|gauge| = $gauge)" + end + Δs = real(Δσ) + im * imag(udΔu - vdΔv) / (2 * σ) + else + Δs = real(Δσ) + end + b = (add(Δu, u, -udΔu), add(Δv, v, -vdΔv)) + (x, y), reverse_info = let σ = σ, u = u, v = v + linsolve(b, zerovector(b), alg_rrule) do (x, y) + x′ = VectorInterface.add!!(apply_normal(f, y), x, σ, -1) + y′ = VectorInterface.add!!(apply_adjoint(f, x), y, σ, -1) + x′ = VectorInterface.add!!(x′, u, -inner(u, x′)) + y′ = VectorInterface.add!!(y′, v, -inner(v, y′)) + return (x′, y′) + end + end + if info.converged >= i && reverse_info.converged == 0 && + alg_primal.verbosity >= WARN_LEVEL + @warn "`svdsolve` cotangent linear problem ($i) did not converge, whereas the primal eigenvalue problem did: normres = $(reverse_info.normres)" + end + x = VectorInterface.add!!(x, u, Δs / 2) + y = VectorInterface.add!!(y, v, conj(Δs) / 2) + xs[i] = x + ys[i] = y + end + return xs, ys +end +function compute_svdsolve_pullback_data(Δvals, Δlvecs, Δrvecs, vals, lvecs, rvecs, info, f, + which, alg_primal, alg_rrule::Arnoldi) + @assert which == :LR "pullback currently only implemented for `which == :LR`" + T = scalartype(lvecs) + n = length(Δvals) + UdΔU = zeros(T, n, n) + VdΔV = zeros(T, n, n) + for j in 1:n + for i in 1:n + if !(Δlvecs[j] isa AbstractZero) + UdΔU[i, j] = inner(lvecs[i], Δlvecs[j]) + end + if !(Δrvecs[j] isa AbstractZero) + VdΔV[i, j] = inner(rvecs[i], Δrvecs[j]) + end + end + end + aUdΔU = rmul!(UdΔU - UdΔU', 1 / 2) + aVdΔV = rmul!(VdΔV - VdΔV', 1 / 2) + + tol = alg_primal.tol + if alg_rrule.verbosity >= WARN_LEVEL + mask = abs.(vals' .- vals) .< tol + gaugepart = view(aUdΔU, mask) + view(aVdΔV, mask) + gauge = norm(gaugepart, Inf) + gauge > alg_primal.tol && + @warn "`svdsolve` cotangents for singular vectors are sensitive to gauge choice: (|gauge| = $gauge)" + end + UdΔAV = (aUdΔU .+ aVdΔV) .* safe_inv.(vals' .- vals, tol) .+ + (aUdΔU .- aVdΔV) .* safe_inv.(vals' .+ vals, tol) + if !(Δvals isa ZeroTangent) + UdΔAV[diagind(UdΔAV)] .+= real.(Δvals) + end + + xs = similar(lvecs, n) + ys = similar(rvecs, n) + for i in 1:n + x = scale(lvecs[1], UdΔAV[1, i] / 2) + y = scale(rvecs[1], conj(UdΔAV[i, 1]) / 2) + for j in 2:n + x = VectorInterface.add!!(x, lvecs[j], UdΔAV[j, i] / 2) + y = VectorInterface.add!!(y, rvecs[j], conj(UdΔAV[i, j]) / 2) + end + xs[i] = x + ys[i] = y + end + + sylvesterargx = similar(lvecs) + for i in 1:n + x = zerovector(lvecs[1]) + if !(Δlvecs[i] isa AbstractZero) + x = VectorInterface.add!!(x, Δlvecs[i], +1) + for j in 1:n + x = VectorInterface.add!!(x, lvecs[j], -UdΔU[j, i]) + end + end + sylvesterargx[i] = x + end + sylvesterargy = similar(rvecs) + for i in 1:n + y = zerovector(rvecs[1]) + if !(Δrvecs[i] isa AbstractZero) + y = VectorInterface.add!!(y, Δrvecs[i], +1) + for j in 1:n + y = VectorInterface.add!!(y, rvecs[j], -VdΔV[j, i]) + end + end + sylvesterargy[i] = y + end + + W₀ = (zerovector(lvecs[1]), zerovector(rvecs[1]), fill(one(T), n)) + QU = orthogonalcomplementprojector(lvecs, n) + QV = orthogonalcomplementprojector(rvecs, n) + solver = (T <: Real) ? KrylovKit.realeigsolve : KrylovKit.eigsolve + rvals, Ws, reverse_info = let QU = QU, QV = QV, ΔU = sylvesterargx, ΔV = sylvesterargy + solver(W₀, n, :LR, alg_rrule) do w + x, y, z = w + x′ = QU(apply_normal(f, y)) + y′ = QV(apply_adjoint(f, x)) + @inbounds for i in 1:length(z) + x′ = VectorInterface.add!!(x′, ΔU[i], -z[i]) + y′ = VectorInterface.add!!(y′, ΔV[i], -z[i]) + end + return (x′, y′, vals .* z) + end + end + if info.converged >= n && reverse_info.converged < n && + alg_primal.verbosity >= WARN_LEVEL + @warn "`svdsolve` cotangent problem did not converge, whereas the primal singular value problem did" + end + + # cleanup and construct final result + tol = alg_rrule.tol + for i in 1:n + x, y, z = Ws[i] + _, ic = findmax(abs, z) + if ic != i && alg_primal.verbosity >= WARN_LEVEL + @warn "`svdsolve` cotangent linear problem ($ic) returns unexpected result" + end + factor = 1 / z[ic] + z[ic] = zero(z[ic]) + error = max(norm(z, Inf), abs(rvals[i] - vals[ic])) + if error > 10 * tol && alg_primal.verbosity >= WARN_LEVEL + @warn "`svdsolve` cotangent linear problem ($ic) returns unexpected result: error = $error vs tol = $tol" + end + xs[ic] = VectorInterface.add!!(xs[ic], x, -factor) + ys[ic] = VectorInterface.add!!(ys[ic], y, -factor) + end + return xs, ys +end + +function construct∂f_svd(config, f, lvecs, rvecs, xs, ys) + config isa RuleConfig{>:HasReverseMode} || + throw(ArgumentError("`svdsolve` reverse-mode AD requires AD engine that supports calling back into AD")) + + u, v = lvecs[1], rvecs[1] + x, y = xs[1], ys[1] + ∂f = rrule_via_ad(config, f, v, Val(false))[2](x)[1] + ∂f = ChainRulesCore.add!!(∂f, rrule_via_ad(config, f, u, Val(true))[2](y)[1]) + for i in 2:length(xs) + u, v = lvecs[i], rvecs[i] + x, y = xs[i], ys[i] + ∂f = ChainRulesCore.add!!(∂f, rrule_via_ad(config, f, v, Val(false))[2](x)[1]) + ∂f = ChainRulesCore.add!!(∂f, rrule_via_ad(config, f, u, Val(true))[2](y)[1]) + end + return ∂f +end +function construct∂f_svd(config, (f, fᴴ)::Tuple{Any,Any}, lvecs, rvecs, xs, ys) + config isa RuleConfig{>:HasReverseMode} || + throw(ArgumentError("`svdsolve` reverse-mode AD requires AD engine that supports calling back into AD")) + + u, v = lvecs[1], rvecs[1] + x, y = xs[1], ys[1] + ∂f = rrule_via_ad(config, f, v)[2](x)[1] + ∂fᴴ = rrule_via_ad(config, fᴴ, u)[2](y)[1] + for i in 2:length(xs) + u, v = lvecs[i], rvecs[i] + x, y = xs[i], ys[i] + ∂f = ChainRulesCore.add!!(∂f, rrule_via_ad(config, f, v)[2](x)[1]) + ∂fᴴ = ChainRulesCore.add!!(∂fᴴ, rrule_via_ad(config, fᴴ, u)[2](y)[1]) + end + return (∂f, ∂fᴴ) +end +function construct∂f_svd(config, A::AbstractMatrix, lvecs, rvecs, xs, ys) + if A isa StridedMatrix + return InplaceableThunk(Ā -> _buildĀ_svd!(Ā, lvecs, rvecs, xs, ys), + @thunk(_buildĀ_svd!(zero(A), lvecs, rvecs, xs, ys))) + else + return @thunk(ProjectTo(A)(_buildĀ_svd!(zero(A), lvecs, rvecs, xs, ys))) + end +end +function _buildĀ_svd!(Ā, lvecs, rvecs, xs, ys) + for i in 1:length(xs) + u, v = lvecs[i], rvecs[i] + x, y = xs[i], ys[i] + mul!(Ā, x, v', +1, +1) + mul!(Ā, u, y', +1, +1) + end + return Ā +end diff --git a/ext/KrylovKitChainRulesCoreExt/utilities.jl b/ext/KrylovKitChainRulesCoreExt/utilities.jl new file mode 100644 index 00000000..704462e3 --- /dev/null +++ b/ext/KrylovKitChainRulesCoreExt/utilities.jl @@ -0,0 +1,64 @@ +safe_inv(a, tol) = abs(a) < tol ? zero(a) : inv(a) + +# vecs are assumed orthonormal +function orthogonalprojector(vecs, n) + function projector(w) + w′ = zerovector(w) + @inbounds for i in 1:n + w′ = VectorInterface.add!!(w′, vecs[i], inner(vecs[i], w)) + end + return w′ + end + return projector +end +function orthogonalcomplementprojector(vecs, n) + function projector(w) + w′ = scale(w, 1) + @inbounds for i in 1:n + w′ = VectorInterface.add!!(w′, vecs[i], -inner(vecs[i], w)) + end + return w′ + end + return projector +end +# vecs are not assumed orthonormal, G is the Cholesky factorisation of the overlap matrix +function orthogonalprojector(vecs, n, G::Cholesky) + overlaps = zeros(eltype(G), n) + function projector(w) + @inbounds for i in 1:n + overlaps[i] = inner(vecs[i], w) + end + overlaps = ldiv!(G, overlaps) + w′ = zerovector(w) + @inbounds for i in 1:n + w′ = VectorInterface.add!!(w′, vecs[i], +overlaps[i]) + end + return w′ + end + return projector +end +function orthogonalcomplementprojector(vecs, n, G::Cholesky) + overlaps = zeros(eltype(G), n) + function projector(w) + @inbounds for i in 1:n + overlaps[i] = inner(vecs[i], w) + end + overlaps = ldiv!(G, overlaps) + w′ = scale(w, 1) + @inbounds for i in 1:n + w′ = VectorInterface.add!!(w′, vecs[i], -overlaps[i]) + end + return w′ + end + return projector +end + +function _realview(v::AbstractVector{Complex{T}}) where {T} + v_real = reinterpret(T, v) + return view(v_real, axes(v_real, 1)[begin:2:end]) +end + +function _imagview(v::AbstractVector{Complex{T}}) where {T} + v_real = reinterpret(T, v) + return view(v_real, axes(v_real, 1)[(begin + 1):2:end]) +end diff --git a/src/KrylovKit.jl b/src/KrylovKit.jl index 54a8da34..c43ce082 100644 --- a/src/KrylovKit.jl +++ b/src/KrylovKit.jl @@ -24,18 +24,20 @@ using VectorInterface using VectorInterface: add!! using LinearAlgebra using Printf -using ChainRulesCore -using GPUArraysCore +using Random +using PackageExtensionCompat const IndexRange = AbstractRange{Int} -export linsolve, eigsolve, geneigsolve, svdsolve, schursolve, exponentiate, expintegrator +export linsolve, reallinsolve, lssolve, reallssolve +export eigsolve, geneigsolve, realeigsolve, schursolve, svdsolve +export exponentiate, expintegrator export orthogonalize, orthogonalize!!, orthonormalize, orthonormalize!! export basis, rayleighquotient, residual, normres, rayleighextension export initialize, initialize!, expand!, shrink! export ClassicalGramSchmidt, ClassicalGramSchmidt2, ClassicalGramSchmidtIR export ModifiedGramSchmidt, ModifiedGramSchmidt2, ModifiedGramSchmidtIR export LanczosIterator, ArnoldiIterator, GKLIterator -export CG, GMRES, BiCGStab, Lanczos, Arnoldi, GKL, GolubYe +export CG, GMRES, BiCGStab, Lanczos, Arnoldi, GKL, GolubYe, LSMR export KrylovDefaults, EigSorter export RecursiveVec, InnerProductVec @@ -60,7 +62,9 @@ enable_threads() = set_num_threads(Base.Threads.nthreads()) disable_threads() = set_num_threads(1) function __init__() - return set_num_threads(Base.Threads.nthreads()) + @require_extensions + set_num_threads(Base.Threads.nthreads()) + return nothing end struct SplitRange @@ -87,20 +91,18 @@ function Base.iterate(r::SplitRange, i=1) offset = (i - 1) * (r.innerlength + 1) * step start = r.start + offset stop = start + step * r.innerlength + return StepRange(start, step, stop), i + 1 elseif i <= r.outerlength offset = (r.outerlength1 + (i - 1) * r.innerlength) * step start = r.start + offset stop = start + step * (r.innerlength - 1) + return StepRange(start, step, stop), i + 1 else return nothing end - return StepRange(start, step, stop), i + 1 end Base.length(r::SplitRange) = r.outerlength -# Algorithm types -include("algorithms.jl") - # Structures to store a list of basis vectors """ abstract type Basis{T} end @@ -118,14 +120,6 @@ See [`OrthonormalBasis`](@ref) for a specific implementation. """ abstract type Basis{T} end -include("orthonormal.jl") - -# Dense linear algebra structures and functions used in the algorithms below -include("dense/givens.jl") -include("dense/linalg.jl") -include("dense/packedhessenberg.jl") -include("dense/reflector.jl") - # Simple coordinate basis vector, i.e. a vector of all zeros and a single one on position `k`: """ SimpleBasisVector(m, k) @@ -148,7 +142,8 @@ end # some often used tools function checkposdef(z) r = checkhermitian(z) - r > 0 || error("operator does not appear to be positive definite: diagonal element $z") + r > 0 || + error("operator does not appear to be positive definite: diagonal element $z") return r end function checkhermitian(z, n=abs(z)) @@ -160,6 +155,23 @@ end # apply operators include("apply.jl") +# Verbosity levels +const WARN_LEVEL = 1 +const STARTSTOP_LEVEL = 2 +const EACHITERATION_LEVEL = 3 + +# Algorithm types +include("algorithms.jl") + +# OrthonormalBasis, orthogonalization and orthonormalization methods +include("orthonormal.jl") + +# Dense linear algebra structures and functions used in the algorithms below +include("dense/givens.jl") +include("dense/linalg.jl") +include("dense/packedhessenberg.jl") +include("dense/reflector.jl") + # Krylov and related factorizations and their iterators include("factorizations/krylov.jl") include("factorizations/lanczos.jl") @@ -213,16 +225,35 @@ function Base.show(io::IO, info::ConvergenceInfo) " iterations and ", info.numops, " applications of the linear map;") - return println(io, "norms of residuals are given by $((info.normres...,)).") + return print(io, "norms of residuals are given by ", normres2string(info.normres), ".") end -# eigsolve en schursolve -include("eigsolve/eigsolve.jl") -include("eigsolve/lanczos.jl") -include("eigsolve/arnoldi.jl") -include("eigsolve/geneigsolve.jl") -include("eigsolve/golubye.jl") -include("eigsolve/svdsolve.jl") +# Convert residual norms into strings for info and warning printing +normres2string(β::Number) = @sprintf("%.2e", β) +function normres2string(β) + s = "(" + for i in 1:length(β) + s *= normres2string(β[i]) + i < length(β) && (s *= ", ") + end + s *= ")" + return s +end + +# vectors with modified inner product +include("innerproductvec.jl") + +# support for real +_realinner(v, w) = real(inner(v, w)) +const RealVec{V} = InnerProductVec{typeof(_realinner),V} +RealVec(v) = InnerProductVec(v, _realinner) + +apply(A, x::RealVec) = RealVec(apply(A, x[])) + +apply_normal(f::Tuple{Any,Any}, x::RealVec) = RealVec(apply_normal(f, x[])) +apply_adjoint(f::Tuple{Any,Any}, x::RealVec) = RealVec(apply_adjoint(f, x[])) +apply_normal(f, x::RealVec) = RealVec(apply_normal(f, x[])) +apply_adjoint(f, x::RealVec) = RealVec(apply_adjoint(f, x[])) # linsolve include("linsolve/linsolve.jl") @@ -230,15 +261,22 @@ include("linsolve/cg.jl") include("linsolve/gmres.jl") include("linsolve/bicgstab.jl") +# lssolve +include("lssolve/lssolve.jl") +include("lssolve/lsmr.jl") + +# eigsolve and svdsolve +include("eigsolve/eigsolve.jl") +include("eigsolve/lanczos.jl") +include("eigsolve/arnoldi.jl") +include("eigsolve/geneigsolve.jl") +include("eigsolve/golubye.jl") +include("eigsolve/svdsolve.jl") + # exponentiate include("matrixfun/exponentiate.jl") include("matrixfun/expintegrator.jl") -# rules for automatic differentation -include("adrules/linsolve.jl") - -# custom vector types -include("recursivevec.jl") -include("innerproductvec.jl") - +# deprecations +include("deprecated.jl") end diff --git a/src/adrules/linsolve.jl b/src/adrules/linsolve.jl deleted file mode 100644 index 10284b26..00000000 --- a/src/adrules/linsolve.jl +++ /dev/null @@ -1,141 +0,0 @@ -function ChainRulesCore.rrule(::typeof(linsolve), - A::AbstractMatrix, - b::AbstractVector, - x₀, - algorithm, - a₀, - a₁) - (x, info) = linsolve(A, b, x₀, algorithm, a₀, a₁) - project_A = ProjectTo(A) - - function linsolve_pullback(X̄) - x̄ = unthunk(X̄[1]) - ∂self = NoTangent() - ∂x₀ = ZeroTangent() - ∂algorithm = NoTangent() - ∂b, reverse_info = linsolve(A', x̄, (zero(a₀) * zero(a₁)) * x̄, algorithm, conj(a₀), - conj(a₁)) - if info.converged > 0 && reverse_info.converged == 0 - @warn "The cotangent linear problem did not converge, whereas the primal linear problem did." - end - if A isa StridedMatrix - ∂A = InplaceableThunk(Ā -> mul!(Ā, ∂b, x', -conj(a₁), true), - @thunk(-conj(a₁) * ∂b * x')) - else - ∂A = @thunk(project_A(-conj(a₁) * ∂b * x')) - end - ∂a₀ = @thunk(-dot(x, ∂b)) - if a₀ == zero(a₀) && a₁ == one(a₁) - ∂a₁ = @thunk(-dot(b, ∂b)) - else - ∂a₁ = @thunk(-dot((b - a₀ * x) / a₁, ∂b)) - end - return ∂self, ∂A, ∂b, ∂x₀, ∂algorithm, ∂a₀, ∂a₁ - end - return (x, info), linsolve_pullback -end - -function ChainRulesCore.rrule(config::RuleConfig{>:HasReverseMode}, - ::typeof(linsolve), - A::AbstractMatrix, - b::AbstractVector, - x₀, - algorithm, - a₀, - a₁) - return rrule(linsolve, A, b, x₀, algorithm, a₀, a₁) -end - -function ChainRulesCore.rrule(config::RuleConfig{>:HasReverseMode}, - ::typeof(linsolve), - f, - b, - x₀, - algorithm, - a₀, - a₁) - x, info = linsolve(f, b, x₀, algorithm, a₀, a₁) - - # f defines a linear map => pullback defines action of the adjoint - (y, f_pullback) = rrule_via_ad(config, f, x) - fᴴ(xᴴ) = f_pullback(xᴴ)[2] - # TODO can we avoid computing f_pullback if algorithm isa Union{CG,MINRES}? - - function linsolve_pullback(X̄) - x̄ = unthunk(X̄[1]) - ∂self = NoTangent() - ∂x₀ = ZeroTangent() - ∂algorithm = NoTangent() - T = VectorInterface.promote_scale(VectorInterface.promote_scale(x̄, a₀), - scalartype(a₁)) - ∂b, reverse_info = linsolve(fᴴ, x̄, zerovector(x̄, T), algorithm, conj(a₀), - conj(a₁)) - if reverse_info.converged == 0 - @warn "Linear problem for reverse rule did not converge." reverse_info - end - ∂f = @thunk(f_pullback(scale(∂b, -conj(a₁)))[1]) - ∂a₀ = @thunk(-inner(x, ∂b)) - # ∂a₁ = @thunk(-dot(f(x), ∂b)) - if a₀ == zero(a₀) && a₁ == one(a₁) - ∂a₁ = @thunk(-inner(b, ∂b)) - else - ∂a₁ = @thunk(-inner(scale!!(add(b, x, -a₀), inv(a₁)), ∂b)) - end - return ∂self, ∂f, ∂b, ∂x₀, ∂algorithm, ∂a₀, ∂a₁ - end - return (x, info), linsolve_pullback -end - -# frule - currently untested - -function ChainRulesCore.frule((_, ΔA, Δb, Δx₀, _, Δa₀, Δa₁)::Tuple, ::typeof(linsolve), - A::AbstractMatrix, b::AbstractVector, x₀, algorithm, a₀, a₁) - (x, info) = linsolve(A, b, x₀, algorithm, a₀, a₁) - - if Δb isa ChainRulesCore.AbstractZero - rhs = zerovector(b) - else - rhs = scale(Δb, (1 - Δa₁)) - end - if !iszero(Δa₀) - rhs = add!!(rhs, x, -Δa₀) - end - if !iszero(ΔA) - rhs = mul!(rhs, ΔA, x, -a₁, true) - end - (Δx, forward_info) = linsolve(A, rhs, zerovector(rhs), algorithm, a₀, a₁) - if info.converged > 0 && forward_info.converged == 0 - @warn "The tangent linear problem did not converge, whereas the primal linear problem did." - end - return (x, info), (Δx, NoTangent()) -end - -function ChainRulesCore.frule(config::RuleConfig{>:HasForwardsMode}, tangents, - ::typeof(linsolve), - A::AbstractMatrix, b::AbstractVector, x₀, algorithm, a₀, a₁) - return frule(tangents, linsolve, A, b, x₀, algorithm, a₀, a₁) -end - -function ChainRulesCore.frule(config::RuleConfig{>:HasForwardsMode}, - (_, Δf, Δb, Δx₀, _, Δa₀, Δa₁), - ::typeof(linsolve), - f, b, x₀, algorithm, a₀, a₁) - (x, info) = linsolve(f, b, x₀, algorithm, a₀, a₁) - - if Δb isa AbstractZero - rhs = zerovector(b) - else - rhs = scale(Δb, (1 - Δa₁)) - end - if !iszero(Δa₀) - rhs = add!!(rhs, x, -Δa₀) - end - if !(Δf isa AbstractZero) - rhs = add!!(rhs, frule_via_ad(config, (Δf, ZeroTangent()), f, x), -a₀) - end - (Δx, forward_info) = linsolve(f, rhs, zerovector(rhs), algorithm, a₀, a₁) - if info.converged > 0 && forward_info.converged == 0 - @warn "The tangent linear problem did not converge, whereas the primal linear problem did." - end - return (x, info), (Δx, NoTangent()) -end diff --git a/src/algorithms.jl b/src/algorithms.jl index b771c966..91f6f214 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -84,8 +84,12 @@ abstract type KrylovAlgorithm end # General purpose; good for linear systems, eigensystems and matrix functions """ - Lanczos(; krylovdim = KrylovDefaults.krylovdim, maxiter = KrylovDefaults.maxiter, - tol = KrylovDefaults.tol, orth = KrylovDefaults.orth, eager = false, verbosity = 0) + Lanczos(; krylovdim=KrylovDefaults.krylovdim[], + maxiter=KrylovDefaults.maxiter[], + tol=KrylovDefaults.tol[], + orth=KrylovDefaults.orth, + eager=false, + verbosity=KrylovDefaults.verbosity[]) Represents the Lanczos algorithm for building the Krylov subspace; assumes the linear operator is real symmetric or complex Hermitian. Can be used in `eigsolve` and @@ -93,10 +97,11 @@ operator is real symmetric or complex Hermitian. Can be used in `eigsolve` and `krylovdim`, which will be repeated at most `maxiter` times and will stop when the norm of the residual of the Lanczos factorization is smaller than `tol`. The orthogonalizer `orth` will be used to orthogonalize the different Krylov vectors. Eager mode, as selected by -`eager = true`, means that the algorithm that uses this Lanczos process (e.g. `eigsolve`) +`eager=true`, means that the algorithm that uses this Lanczos process (e.g. `eigsolve`) can try to finish its computation before the total Krylov subspace of dimension `krylovdim` -is constructed. Default verbosity level `verbosity` is zero, meaning that no output will be -printed. +is constructed. The default verbosity level `verbosity` amounts to printing warnings upon +lack of convergence. + Use `Arnoldi` for non-symmetric or non-Hermitian linear operators. @@ -111,26 +116,31 @@ struct Lanczos{O<:Orthogonalizer,S<:Real} <: KrylovAlgorithm verbosity::Int end function Lanczos(; - krylovdim::Int=KrylovDefaults.krylovdim, - maxiter::Int=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, + krylovdim::Int=KrylovDefaults.krylovdim[], + maxiter::Int=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], orth::Orthogonalizer=KrylovDefaults.orth, eager::Bool=false, - verbosity::Int=0) + verbosity::Int=KrylovDefaults.verbosity[]) return Lanczos(orth, krylovdim, maxiter, tol, eager, verbosity) end """ - GKL(; krylovdim = KrylovDefaults.krylovdim, maxiter = KrylovDefaults.maxiter, - tol = KrylovDefaults.tol, orth = KrylovDefaults.orth, verbosity = 0) + GKL(; krylovdim=KrylovDefaults.krylovdim[], + maxiter=KrylovDefaults.maxiter[], + tol=KrylovDefaults.tol[], + orth=KrylovDefaults.orth, + eager=false, + verbosity=KrylovDefaults.verbosity[]) Represents the Golub-Kahan-Lanczos bidiagonalization algorithm for sequentially building a Krylov-like factorization of a general matrix or linear operator with a bidiagonal reduced matrix. Can be used in `svdsolve`. The corresponding algorithm builds a Krylov subspace of size at most `krylovdim`, which will be repeated at most `maxiter` times and will stop when the norm of the residual of the Arnoldi factorization is smaller than `tol`. The -orthogonalizer `orth` will be used to orthogonalize the different Krylov vectors. Default -verbosity level `verbosity` is zero, meaning that no output will be printed. +orthogonalizer `orth` will be used to orthogonalize the different Krylov vectors. The default +verbosity level `verbosity` amounts to printing warnings upon lack of convergence. + See also: [`svdsolve`](@ref), [`Orthogonalizer`](@ref) """ @@ -143,28 +153,33 @@ struct GKL{O<:Orthogonalizer,S<:Real} <: KrylovAlgorithm verbosity::Int end function GKL(; - krylovdim::Int=KrylovDefaults.krylovdim, - maxiter::Int=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, + krylovdim::Int=KrylovDefaults.krylovdim[], + maxiter::Int=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], orth::Orthogonalizer=KrylovDefaults.orth, eager::Bool=false, - verbosity::Int=0) + verbosity::Int=KrylovDefaults.verbosity[]) return GKL(orth, krylovdim, maxiter, tol, eager, verbosity) end """ - Arnoldi(; krylovdim = KrylovDefaults.krylovdim, maxiter = KrylovDefaults.maxiter, - tol = KrylovDefaults.tol, orth = KrylovDefaults.orth, eager = false, verbosity = 0) + Arnoldi(; krylovdim=KrylovDefaults.krylovdim[], + maxiter=KrylovDefaults.maxiter[], + tol=KrylovDefaults.tol[], + orth=KrylovDefaults.orth, + eager=false, + verbosity=KrylovDefaults.verbosity[]) Represents the Arnoldi algorithm for building the Krylov subspace for a general matrix or linear operator. Can be used in `eigsolve` and `exponentiate`. The corresponding algorithms will build a Krylov subspace of size at most `krylovdim`, which will be repeated at most `maxiter` times and will stop when the norm of the residual of the Arnoldi factorization is smaller than `tol`. The orthogonalizer `orth` will be used to orthogonalize the different -Krylov vectors. Eager mode, as selected by `eager = true`, means that the algorithm that +Krylov vectors. Eager mode, as selected by `eager=true`, means that the algorithm that uses this Arnoldi process (e.g. `eigsolve`) can try to finish its computation before the -total Krylov subspace of dimension `krylovdim` is constructed. Default verbosity level -`verbosity` is zero, meaning that no output will be printed. +total Krylov subspace of dimension `krylovdim` is constructed. The default verbosity level +`verbosity` amounts to printing warnings upon lack of convergence. + Use `Lanczos` for real symmetric or complex Hermitian linear operators. @@ -180,18 +195,22 @@ struct Arnoldi{O<:Orthogonalizer,S<:Real} <: KrylovAlgorithm verbosity::Int end function Arnoldi(; - krylovdim::Int=KrylovDefaults.krylovdim, - maxiter::Int=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, + krylovdim::Int=KrylovDefaults.krylovdim[], + maxiter::Int=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], orth::Orthogonalizer=KrylovDefaults.orth, eager::Bool=false, - verbosity::Int=0) + verbosity::Int=KrylovDefaults.verbosity[]) return Arnoldi(orth, krylovdim, maxiter, tol, eager, verbosity) end """ - GolubYe(; krylovdim = KrylovDefaults.krylovdim, maxiter = KrylovDefaults.maxiter, - tol = KrylovDefaults.tol, orth = KrylovDefaults.orth, verbosity = 0) + GolubYe(; krylovdim=KrylovDefaults.krylovdim[], + maxiter=KrylovDefaults.maxiter[], + tol=KrylovDefaults.tol[], + orth=KrylovDefaults.orth, + eager=false, + verbosity=KrylovDefaults.verbosity[]) Represents the Golub-Ye algorithm for solving hermitian (symmetric) generalized eigenvalue problems `A x = λ B x` with positive definite `B`, without the need for inverting `B`. @@ -199,7 +218,7 @@ Builds a Krylov subspace of size `krylovdim` starting from an estimate `x` by ac `(A - ρ(x) B)`, where `ρ(x) = dot(x, A*x)/dot(x, B*x)`, and employing the Lanczos algorithm. This process is repeated at most `maxiter` times. In every iteration `k>1`, the subspace will also be expanded to size `krylovdim+1` by adding ``x_k - x_{k-1}``, which is -known as the LOPCG correction and was suggested by Money and Ye. With `krylovdim = 2`, this +known as the LOPCG correction and was suggested by Money and Ye. With `krylovdim=2`, this algorithm becomes equivalent to `LOPCG`. """ struct GolubYe{O<:Orthogonalizer,S<:Real} <: KrylovAlgorithm @@ -210,11 +229,11 @@ struct GolubYe{O<:Orthogonalizer,S<:Real} <: KrylovAlgorithm verbosity::Int end function GolubYe(; - krylovdim::Int=KrylovDefaults.krylovdim, - maxiter::Int=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, + krylovdim::Int=KrylovDefaults.krylovdim[], + maxiter::Int=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], orth::Orthogonalizer=KrylovDefaults.orth, - verbosity::Int=0) + verbosity::Int=KrylovDefaults.verbosity[]) return GolubYe(orth, krylovdim, maxiter, tol, verbosity) end @@ -222,16 +241,17 @@ end abstract type LinearSolver <: KrylovAlgorithm end """ - CG(; maxiter = KrylovDefaults.maxiter, tol = KrylovDefaults.tol) + CG(; maxiter=KrylovDefaults.maxiter[], tol=KrylovDefaults.tol[], verbosity=KrylovDefaults.verbosity[]) Construct an instance of the conjugate gradient algorithm with specified parameters, which can be passed to `linsolve` in order to iteratively solve a linear system with a positive definite (and thus symmetric or hermitian) coefficient matrix or operator. The `CG` method will search for the optimal `x` in a Krylov subspace of maximal size `maxiter`, or stop when -`norm(A*x - b) < tol`. Default verbosity level `verbosity` is zero, meaning that no output -will be printed. +`norm(A*x - b) < tol`. The default verbosity level `verbosity` amounts to printing warnings +upon lack of convergence. -See also: [`linsolve`](@ref), [`MINRES`](@ref), [`GMRES`](@ref), [`BiCG`](@ref), + +See also: [`linsolve`](@ref), [`MINRES`](@ref), [`GMRES`](@ref), [`BiCG`](@ref), [`LSMR`](@ref), [`BiCGStab`](@ref) """ struct CG{S<:Real} <: LinearSolver @@ -240,29 +260,33 @@ struct CG{S<:Real} <: LinearSolver verbosity::Int end function CG(; - maxiter::Integer=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, - verbosity::Int=0) + maxiter::Integer=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], + verbosity::Int=KrylovDefaults.verbosity[]) return CG(maxiter, tol, verbosity) end """ - GMRES(; krylovdim = KrylovDefaults.krylovdim, maxiter = KrylovDefaults.maxiter, - tol = KrylovDefaults.tol, orth::Orthogonalizer = KrylovDefaults.orth) + GMRES(; krylovdim=KrylovDefaults.krylovdim[], + maxiter=KrylovDefaults.maxiter[], + tol=KrylovDefaults.tol[], + orth::Orthogonalizer=KrylovDefaults.orth, + verbosity=KrylovDefaults.verbosity[]) Construct an instance of the GMRES algorithm with specified parameters, which can be passed to `linsolve` in order to iteratively solve a linear system. The `GMRES` method will search for the optimal `x` in a Krylov subspace of maximal size `krylovdim`, and repeat this process for at most `maxiter` times, or stop when `norm(A*x - b) < tol`. In building the -Krylov subspace, `GMRES` will use the orthogonalizer `orth`. Default verbosity level -`verbosity` is zero, meaning that no output will be printed. +Krylov subspace, `GMRES` will use the orthogonalizer `orth`. The default verbosity level +`verbosity` amounts to printing warnings upon lack of convergence. + Note that in the traditional nomenclature of `GMRES`, the parameter `krylovdim` is referred to as the restart parameter, and `maxiter` is the number of outer iterations, i.e. restart cycles. The total iteration count, i.e. the number of expansion steps, is roughly `krylovdim` times the number of iterations. -See also: [`linsolve`](@ref), [`BiCG`](@ref), [`BiCGStab`](@ref), [`CG`](@ref), +See also: [`linsolve`](@ref), [`BiCG`](@ref), [`BiCGStab`](@ref), [`CG`](@ref), [`LSMR`](@ref), [`MINRES`](@ref) """ struct GMRES{O<:Orthogonalizer,S<:Real} <: LinearSolver @@ -273,17 +297,17 @@ struct GMRES{O<:Orthogonalizer,S<:Real} <: LinearSolver verbosity::Int end function GMRES(; - krylovdim::Integer=KrylovDefaults.krylovdim, - maxiter::Integer=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, + krylovdim::Integer=KrylovDefaults.krylovdim[], + maxiter::Integer=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], orth::Orthogonalizer=KrylovDefaults.orth, - verbosity::Int=0) + verbosity::Int=KrylovDefaults.verbosity[]) return GMRES(orth, maxiter, krylovdim, tol, verbosity) end # TODO """ - MINRES(; maxiter = KrylovDefaults.maxiter, tol = KrylovDefaults.tol) + MINRES(; maxiter=KrylovDefaults.maxiter[], tol=KrylovDefaults.tol[], verbosity=KrylovDefaults.verbosity[]) !!! warning "Not implemented yet" @@ -292,10 +316,11 @@ end real symmetric or complex hermitian coefficient matrix or operator. The `MINRES` method will search for the optimal `x` in a Krylov subspace of maximal size `maxiter`, or stop when `norm(A*x - b) < tol`. In building the Krylov subspace, `MINRES` will use the - orthogonalizer `orth`. Default verbosity level `verbosity` is zero, meaning that no - output will be printed. + orthogonalizer `orth`. The default verbosity level `verbosity` amounts to printing + warnings upon lack of convergence. -See also: [`linsolve`](@ref), [`CG`](@ref), [`GMRES`](@ref), [`BiCG`](@ref), + +See also: [`linsolve`](@ref), [`CG`](@ref), [`GMRES`](@ref), [`BiCG`](@ref), [`LSMR`](@ref), [`BiCGStab`](@ref) """ struct MINRES{S<:Real} <: LinearSolver @@ -304,14 +329,14 @@ struct MINRES{S<:Real} <: LinearSolver verbosity::Int end function MINRES(; - maxiter::Integer=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, - verbosity::Int=0) + maxiter::Integer=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], + verbosity::Int=KrylovDefaults.verbosity[]) return MINRES(maxiter, tol, verbosity) end """ - BiCG(; maxiter = KrylovDefaults.maxiter, tol = KrylovDefaults.tol) + BiCG(; maxiter=KrylovDefaults.maxiter[], tol=KrylovDefaults.tol[], verbosity=KrylovDefaults.verbosity[]) !!! warning "Not implemented yet" @@ -319,10 +344,11 @@ end which can be passed to `linsolve` in order to iteratively solve a linear system general linear map, of which the adjoint can also be applied. The `BiCG` method will search for the optimal `x` in a Krylov subspace of maximal size `maxiter`, or stop when `norm(A*x - - b) < tol`. Default verbosity level `verbosity` is zero, meaning that no output will be - printed. + b) < tol`. The default verbosity level `verbosity` amounts to printing warnings upon + lack of convergence. + -See also: [`linsolve`](@ref), [`GMRES`](@ref), [`CG`](@ref), [`BiCGStab`](@ref), +See also: [`linsolve`](@ref), [`GMRES`](@ref), [`CG`](@ref), [`BiCGStab`](@ref), [`LSMR`](@ref), [`MINRES`](@ref) """ struct BiCG{S<:Real} <: LinearSolver @@ -331,22 +357,22 @@ struct BiCG{S<:Real} <: LinearSolver verbosity::Int end function BiCG(; - maxiter::Integer=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, - verbosity::Int=0) + maxiter::Integer=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], + verbosity::Int=KrylovDefaults.verbosity[]) return BiCG(maxiter, tol, verbosity) end """ - BiCGStab(; maxiter = KrylovDefaults.maxiter, tol = KrylovDefaults.tol) + BiCGStab(; maxiter=KrylovDefaults.maxiter[], tol=KrylovDefaults.tol[], verbosity=KrylovDefaults.verbosity[]) Construct an instance of the Biconjugate gradient algorithm with specified parameters, which can be passed to `linsolve` in order to iteratively solve a linear system general linear map. The `BiCGStab` method will search for the optimal `x` in a Krylov subspace - of maximal size `maxiter`, or stop when `norm(A*x - b) < tol`. Default verbosity level - `verbosity` is zero, meaning that no output will be printed. + of maximal size `maxiter`, or stop when `norm(A*x - b) < tol`. The default verbosity level + `verbosity` amounts to printing warnings upon lack of convergence. -See also: [`linsolve`](@ref), [`GMRES`](@ref), [`CG`](@ref), [`BiCG`](@ref), +See also: [`linsolve`](@ref), [`GMRES`](@ref), [`CG`](@ref), [`BiCG`](@ref), [`LSMR`](@ref), [`MINRES`](@ref) """ struct BiCGStab{S<:Real} <: LinearSolver @@ -355,12 +381,52 @@ struct BiCGStab{S<:Real} <: LinearSolver verbosity::Int end function BiCGStab(; - maxiter::Integer=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, - verbosity::Int=0) + maxiter::Integer=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], + verbosity::Int=KrylovDefaults.verbosity[]) return BiCGStab(maxiter, tol, verbosity) end +# Solving least squares problems +abstract type LeastSquaresSolver <: KrylovAlgorithm end +""" + LSMR(; krylovdim=1, + maxiter=KrylovDefaults.maxiter[], + tol=KrylovDefaults.tol[], + orth::Orthogonalizer=ModifiedGramSchmidt(), + verbosity=KrylovDefaults.verbosity[]) + +Represents the LSMR algorithm, which minimizes ``\\|Ax - b\\|^2 + \\|λx\\|^2`` in the Euclidean norm. +If multiple solutions exists the minimum norm solution is returned. +The method is based on the Golub-Kahan bidiagonalization process. It is +algebraically equivalent to applying MINRES to the normal equations +``(A^*A + λ^2I)x = A^*b``, but has better numerical properties, +especially if ``A`` is ill-conditioned. + +The `LSMR` method will search for the optimal ``x`` in a Krylov subspace of maximal size +`maxiter`, or stop when ``norm(A'*(A*x - b) + λ^2 * x) < tol``. The parameter `krylovdim` +does in this case not indicate that a subspace of that size will be built, but represents the +number of most recent vectors that will be kept to which the next vector will be reorthogonalized. +The default verbosity level `verbosity` amounts to printing warnings upon lack of convergence. + +See also: [`lssolve`](@ref) +""" +struct LSMR{O<:Orthogonalizer,S<:Real} <: LeastSquaresSolver + orth::O + maxiter::Int + krylovdim::Int + tol::S + verbosity::Int +end +function LSMR(; + krylovdim::Integer=KrylovDefaults.krylovdim[], + maxiter::Integer=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], + orth::Orthogonalizer=ModifiedGramSchmidt(), + verbosity::Int=KrylovDefaults.verbosity[]) + return LSMR(orth, maxiter, krylovdim, tol, verbosity) +end + # Solving eigenvalue systems specifically abstract type EigenSolver <: KrylovAlgorithm end @@ -370,9 +436,10 @@ struct JacobiDavidson <: EigenSolver end """ module KrylovDefaults const orth = KrylovKit.ModifiedGramSchmidtIR() - const krylovdim = 30 - const maxiter = 100 - const tol = 1e-12 + const krylovdim = Ref(30) + const maxiter = Ref(100) + const tol = Ref(1e-12) + const verbosity = Ref(KrylovKit.WARN_LEVEL) end A module listing the default values for the typical parameters in Krylov based algorithms: @@ -394,7 +461,8 @@ A module listing the default values for the typical parameters in Krylov based a module KrylovDefaults using ..KrylovKit const orth = KrylovKit.ModifiedGramSchmidt2() # conservative choice -const krylovdim = 30 -const maxiter = 100 -const tol = 1e-12 +const krylovdim = Ref(30) +const maxiter = Ref(100) +const tol = Ref(1e-12) +const verbosity = Ref(KrylovKit.WARN_LEVEL) end diff --git a/src/dense/linalg.jl b/src/dense/linalg.jl index 56c58fea..1e80906a 100644 --- a/src/dense/linalg.jl +++ b/src/dense/linalg.jl @@ -112,7 +112,11 @@ function ldiv!(A::UpperTriangular, y::AbstractVector, r::UnitRange{Int}=1:length end # Eigenvalue decomposition of SymTridiagonal matrix -function tridiageigh!(A::SymTridiagonal{T}, Z::StridedMatrix{T}=one(A)) where {T<:BlasFloat} +function tridiageigh!(A::SymTridiagonal{T}) where {T<:BlasFloat} + Z = copyto!(similar(A.ev, size(A)), LinearAlgebra.I) + return tridiageigh!(A, Z) +end +function tridiageigh!(A::SymTridiagonal{T}, Z::StridedMatrix{T}) where {T<:BlasFloat} return stegr!(A.dv, A.ev, Z) end # redefined @@ -244,6 +248,17 @@ function schur2eigvecs(T::StridedMatrix{<:BlasReal}) end return _normalizevecs!(VR) end +function schur2realeigvecs(T::StridedMatrix{<:BlasReal}) + n = checksquare(T) + for i in 1:(n - 1) + iszero(T[i + 1, i]) || throw(ArgumentError("T must be upper triangular")) + end + VR = similar(T, n, n) + VL = similar(T, n, 0) + select = Vector{BlasInt}(undef, 0) + trevc!('R', 'A', select, T, VL, VR) + return _normalizevecs!(VR) +end function schur2eigvecs(T::AbstractMatrix{<:BlasReal}, which::AbstractVector{Int}) n = checksquare(T) which2 = unique(which) diff --git a/src/deprecated.jl b/src/deprecated.jl new file mode 100644 index 00000000..951ce39f --- /dev/null +++ b/src/deprecated.jl @@ -0,0 +1,6 @@ +Base.@deprecate(RecursiveVec(args...), tuple(args...)) + +Base.@deprecate(basis(F::GKLFactorization, which::Symbol), basis(F, Val(which))) + +import LinearAlgebra: mul! +Base.@deprecate(mul!(y, b::OrthonormalBasis, x::AbstractVector), unproject!!(y, b, x)) diff --git a/src/eigsolve/arnoldi.jl b/src/eigsolve/arnoldi.jl index b5ef8719..e8707fc2 100644 --- a/src/eigsolve/arnoldi.jl +++ b/src/eigsolve/arnoldi.jl @@ -36,16 +36,29 @@ should be targeted. Valid specifications of `which` are only be successful if you somehow know that eigenvalues close to zero are also close to the periphery of the spectrum. -The final argument `algorithm` can currently only be an instance of [`Arnoldi`](@ref), but -should nevertheless be specified. Since `schursolve` is less commonly used as `eigsolve`, no -convenient keyword syntax is currently available. +!!! warning "Degenerate eigenvalues" + + From a theoretical point of view, Krylov methods can at most find a single eigenvector + associated with a targetted eigenvalue, even if the latter is degenerate. In the case of + a degenerate eigenvalue, the specific eigenvector that is returned is determined by the + starting vector `x₀`. For large problems, this turns out to be less of an issue in + practice, as often a second linearly independent eigenvector is generated out of the + numerical noise resulting from the orthogonalisation steps in the Lanczos or Arnoldi + iteration. Nonetheless, it is important to take this into account and to try not to + depend on this potentially fragile behaviour, especially for smaller problems. + +The `algorithm` argument currently only supports an instance of [`Arnoldi`](@ref), which +is where the parameters of the Krylov method (such as Krylov dimension and maximum number +of iterations) can be specified. Since `schursolve` is less commonly used as `eigsolve`, +it only supports this expert mode call syntax and no convenient keyword interface is +currently available. ### Return values: The return value is always of the form `T, vecs, vals, info = schursolve(...)` with - `T`: a `Matrix` containing the partial Schur decomposition of the linear map, i.e. it's - elements are given by `T[i,j] = dot(vecs[i], f(vecs[j]))`. It is of Schur form, i.e. + elements are given by `T[i,j] = inner(vecs[i], f(vecs[j]))`. It is of Schur form, i.e. upper triangular in case of complex arithmetic, and block upper triangular (with at most 2x2 blocks) in case of real arithmetic. - `vecs`: a `Vector` of corresponding Schur vectors, of the same length as `vals`. Note @@ -54,7 +67,7 @@ The return value is always of the form `T, vecs, vals, info = schursolve(...)` w objects that are typically similar to the starting guess `x₀`, up to a possibly different `eltype`. When the linear map is a simple `AbstractMatrix`, `vecs` will be `Vector{Vector{<:Number}}`. Schur vectors are by definition orthogonal, i.e. - `dot(vecs[i],vecs[j]) = I[i,j]`. Note that Schur vectors are real if the problem (i.e. + `inner(vecs[i],vecs[j]) = I[i,j]`. Note that Schur vectors are real if the problem (i.e. the linear map and the initial guess) are real. - `vals`: a `Vector` of eigenvalues, i.e. the diagonal elements of `T` in case of complex arithmetic, or extracted from the diagonal blocks in case of real arithmetic. Note that @@ -94,32 +107,36 @@ restarts where a part of the current Krylov subspace is kept. """ function schursolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi) T, U, fact, converged, numiter, numops = _schursolve(A, x₀, howmany, which, alg) + howmany′ = howmany if eltype(T) <: Real && howmany < length(fact) && T[howmany + 1, howmany] != 0 - howmany += 1 + howmany′ += 1 + elseif size(T, 1) < howmany + howmany′ = size(T, 1) end if converged > howmany - howmany = converged + howmany′ = converged end - TT = view(T, 1:howmany, 1:howmany) + TT = view(T, 1:howmany′, 1:howmany′) values = schur2eigvals(TT) + vectors = let B = basis(fact) - [B * u for u in cols(U, 1:howmany)] + [B * u for u in cols(U, 1:howmany′)] end residuals = let r = residual(fact) - [scale(r, last(u)) for u in cols(U, 1:howmany)] + [scale(r, last(u)) for u in cols(U, 1:howmany′)] end - normresiduals = [normres(fact) * abs(last(u)) for u in cols(U, 1:howmany)] - - if alg.verbosity > 0 - if converged < howmany - @warn """Arnoldi schursolve finished without convergence after $numiter iterations: - * $converged eigenvalues converged - * norm of residuals = $((normresiduals...,))""" - else - @info """Arnoldi schursolve finished after $numiter iterations: - * $converged eigenvalues converged - * norm of residuals = $((normresiduals...,))""" - end + normresiduals = [normres(fact) * abs(last(u)) for u in cols(U, 1:howmany′)] + + if (converged < howmany) && alg.verbosity >= WARN_LEVEL + @warn """Arnoldi schursolve stopped without convergence after $numiter iterations: + * $converged eigenvalues converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" + elseif alg.verbosity >= STARTSTOP_LEVEL + @info """Arnoldi schursolve finished after $numiter iterations: + * $converged eigenvalues converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" end return TT, vectors, @@ -127,20 +144,22 @@ function schursolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi) ConvergenceInfo(converged, residuals, normresiduals, numiter, numops) end -function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi) +function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi; alg_rrule=alg) T, U, fact, converged, numiter, numops = _schursolve(A, x₀, howmany, which, alg) + howmany′ = howmany if eltype(T) <: Real && howmany < length(fact) && T[howmany + 1, howmany] != 0 - howmany += 1 + howmany′ += 1 + elseif size(T, 1) < howmany + howmany′ = size(T, 1) end if converged > howmany - howmany = converged + howmany′ = converged end - d = min(howmany, size(T, 2)) - TT = view(T, 1:d, 1:d) + TT = view(T, 1:howmany′, 1:howmany′) values = schur2eigvals(TT) # Compute eigenvectors - V = view(U, :, 1:d) * schur2eigvecs(TT) + V = view(U, :, 1:howmany′) * schur2eigvecs(TT) vectors = let B = basis(fact) [B * v for v in cols(V)] end @@ -149,19 +168,165 @@ function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi) end normresiduals = [normres(fact) * abs(last(v)) for v in cols(V)] - if alg.verbosity > 0 - if converged < howmany - @warn """Arnoldi eigsolve finished without convergence after $numiter iterations: - * $converged eigenvalues converged - * norm of residuals = $((normresiduals...,)) - * number of operations = $numops""" - else - @info """Arnoldi eigsolve finished after $numiter iterations: - * $converged eigenvalues converged - * norm of residuals = $((normresiduals...,)) - * number of operations = $numops""" + if (converged < howmany) && alg.verbosity >= WARN_LEVEL + @warn """Arnoldi eigsolve stopped without convergence after $numiter iterations: + * $converged eigenvalues converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" + elseif alg.verbosity >= STARTSTOP_LEVEL + @info """Arnoldi eigsolve finished after $numiter iterations: + * $converged eigenvalues converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" + end + return values, + vectors, + ConvergenceInfo(converged, residuals, normresiduals, numiter, numops) +end + +""" + # expert version: + realeigsolve(f, x₀, howmany, which, algorithm; alg_rrule=algorithm) + +Compute the first `howmany` eigenvalues (according to the order specified by `which`) +from the real linear map encoded in the matrix `A` or by the function `f`, if it can be +guaranteed that these eigenvalues (and thus their associated eigenvectors) are real. An +error will be thrown if there are complex eigenvalues within the first `howmany` eigenvalues. + +Return eigenvalues, eigenvectors and a `ConvergenceInfo` structure. + +!!! note "Note about real linear maps" + + A function `f` is said to implement a real linear map if it satisfies + `f(add(x,y)) = add(f(x), f(y)` and `f(scale(x, α)) = scale(f(x), α)` for vectors `x` + and `y` and scalars `α::Real`. Note that this is possible even when the vectors are + represented using complex arithmetic. For example, the map `f=x-> x + conj(x)` + represents a real linear map that is not (complex) linear, as it does not satisfy + `f(scale(x, α)) = scale(f(x), α)` for complex scalars `α`. Note that complex linear + maps are always real linear maps and thus can be used in this context, if looking + specifically for real eigenvalues that they may have. + + To interpret the vectors `x` and `y` as elements from a real vector space, the standard + inner product defined on them will be replaced with `real(inner(x,y))`. This has no + effect if the vectors `x` and `y` were represented using real arithmetic to begin with, + and allows to seemlessly use complex vectors as well. + +### Arguments: + +The linear map can be an `AbstractMatrix` (dense or sparse) or a general function or +callable object. A starting vector `x₀` needs to be provided. Note that `x₀` does not need +to be of type `AbstractVector`; any type that behaves as a vector and supports the required +interface (see KrylovKit docs) is accepted. + +The argument `howmany` specifies how many eigenvalues should be computed; `which` specifies +which eigenvalues should be targeted. Valid specifications of `which` for real +problems are given by + + - `:LM`: eigenvalues of largest magnitude + - `:LR`: eigenvalues with largest (most positive) real part + - `:SR`: eigenvalues with smallest (most negative) real part + - [`EigSorter(f; rev = false)`](@ref): eigenvalues `λ` that appear first (or last if + `rev == true`) when sorted by `f(λ)` + +!!! note "Note about selecting `which` eigenvalues" + + Krylov methods work well for extremal eigenvalues, i.e. eigenvalues on the periphery of + the spectrum of the linear map. All of the valid `Symbol`s for `which` have this + property, but could also be specified using `EigSorter`, e.g. `:LM` is equivalent to + `Eigsorter(abs; rev = true)`. Note that smallest magnitude sorting is obtained using + e.g. `EigSorter(abs; rev = false)`, but since no (shift-and)-invert is used, this will + only be successful if you somehow know that eigenvalues close to zero are also close + to the periphery of the spectrum. + +!!! warning "Degenerate eigenvalues" + + From a theoretical point of view, Krylov methods can at most find a single eigenvector + associated with a targetted eigenvalue, even if the latter is degenerate. In the case of + a degenerate eigenvalue, the specific eigenvector that is returned is determined by the + starting vector `x₀`. For large problems, this turns out to be less of an issue in + practice, as often a second linearly independent eigenvector is generated out of the + numerical noise resulting from the orthogonalisation steps in the Lanczos or Arnoldi + iteration. Nonetheless, it is important to take this into account and to try not to + depend on this potentially fragile behaviour, especially for smaller problems. + +The `algorithm` argument currently only supports an instance of [`Arnoldi`](@ref), which +is where the parameters of the Krylov method (such as Krylov dimension and maximum number +of iterations) can be specified. Since `realeigsolve` is less commonly used as `eigsolve`, +it only supports this expert mode call syntax and no convenient keyword interface is +currently available. + +The keyword argument `alg_rrule` can be used to specify an algorithm to be used for computing +the `pullback` of `realeigsolve` in the context of reverse-mode automatic differentation. + +### Return values: + +The return value is always of the form `vals, vecs, info = eigsolve(...)` with + + - `vals`: a `Vector` containing the eigenvalues, of length at least `howmany`, but could + be longer if more eigenvalues were converged at the same cost. Eigenvalues will be real, + an `ArgumentError` will be thrown if the first `howmany` eigenvalues ordered according + to `which` of the linear map are not all real. + - `vecs`: a `Vector` of corresponding eigenvectors, of the same length as `vals`. Note + that eigenvectors are not returned as a matrix, as the linear map could act on any + custom Julia type with vector like behavior, i.e. the elements of the list `vecs` are + objects that are typically similar to the starting guess `x₀`. For a real problem with + real eigenvalues, also the eigenvectors will be real and no complex arithmetic is used + anywhere. + - `info`: an object of type [`ConvergenceInfo`], which has the following fields + + + `info.converged::Int`: indicates how many eigenvalues and eigenvectors were actually + converged to the specified tolerance `tol` (see below under keyword arguments) + + `info.residual::Vector`: a list of the same length as `vals` containing the + residuals `info.residual[i] = f(vecs[i]) - vals[i] * vecs[i]` + + `info.normres::Vector{<:Real}`: list of the same length as `vals` containing the + norm of the residual `info.normres[i] = norm(info.residual[i])` + + `info.numops::Int`: number of times the linear map was applied, i.e. number of times + `f` was called, or a vector was multiplied with `A` + + `info.numiter::Int`: number of times the Krylov subspace was restarted (see below) + +!!! warning "Check for convergence" + + No warning is printed if not all requested eigenvalues were converged, so always check + if `info.converged >= howmany`. +""" +function realeigsolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi; alg_rrule=alg) + T, U, fact, converged, numiter, numops = _schursolve(A, RealVec(x₀), howmany, which, + alg) + i = 0 + while i < length(fact) + i += 1 + if i < length(fact) && T[i + 1, i] != 0 + i -= 1 + break end end + i < howmany && + throw(ArgumentError("only the first $i eigenvalues are real, which is less then the requested `howmany = $howmany`")) + howmany′ = max(howmany, min(i, converged)) + TT = view(T, 1:howmany′, 1:howmany′) + values = diag(TT) + + # Compute eigenvectors + V = view(U, :, 1:howmany′) * schur2realeigvecs(TT) + vectors = let B = basis(fact) + [(B * v)[] for v in cols(V)] + end + residuals = let r = residual(fact)[] + [scale(r, last(v)) for v in cols(V)] + end + normresiduals = [normres(fact) * abs(last(v)) for v in cols(V)] + + if (converged < howmany) && alg.verbosity >= WARN_LEVEL + @warn """Arnoldi realeigsolve stopped without convergence after $numiter iterations: + * $converged eigenvalues converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" + elseif alg.verbosity >= STARTSTOP_LEVEL + @info """Arnoldi realeigsolve finished after $numiter iterations: + * $converged eigenvalues converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" + end return values, vectors, ConvergenceInfo(converged, residuals, normresiduals, numiter, numops) @@ -177,7 +342,7 @@ function _schursolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi) numiter = 1 # initialize arnoldi factorization iter = ArnoldiIterator(A, x₀, alg.orth) - fact = initialize(iter; verbosity=alg.verbosity - 2) + fact = initialize(iter; verbosity=alg.verbosity) numops = 1 sizehint!(fact, krylovdim) β = normres(fact) @@ -195,10 +360,11 @@ function _schursolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi) β = normres(fact) K = length(fact) - if β <= tol - if K < howmany - @warn "Invariant subspace of dimension $K (up to requested tolerance `tol = $tol`), which is smaller than the number of requested eigenvalues (i.e. `howmany == $howmany`); setting `howmany = $K`." - howmany = K + if β <= tol && K < howmany + if alg.verbosity >= WARN_LEVEL + msg = "Invariant subspace of dimension $K (up to requested tolerance `tol = $tol`), " + msg *= "which is smaller than the number of requested eigenvalues (i.e. `howmany == $howmany`)." + @warn msg end end if K == krylovdim || β <= tol || (alg.eager && K >= howmany) # process @@ -224,33 +390,34 @@ function _schursolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi) converged -= 1 end - if converged >= howmany + if converged >= howmany || β <= tol break - elseif alg.verbosity > 1 - msg = "Arnoldi schursolve in iter $numiter, krylovdim = $K: " - msg *= "$converged values converged, normres = (" - msg *= @sprintf("%.2e", abs(f[1])) - for i in 2:howmany - msg *= ", " - msg *= @sprintf("%.2e", abs(f[i])) - end - msg *= ")" - @info msg + elseif alg.verbosity >= EACHITERATION_LEVEL + @info "Arnoldi schursolve in iteration $numiter, step = $K: $converged values converged, normres = $(normres2string(abs.(f[1:howmany])))" end end if K < krylovdim # expand - fact = expand!(iter, fact; verbosity=alg.verbosity - 2) + fact = expand!(iter, fact; verbosity=alg.verbosity) numops += 1 else # shrink numiter == maxiter && break # Determine how many to keep keep = div(3 * krylovdim + 2 * converged, 5) # strictly smaller than krylovdim since converged < howmany <= krylovdim, at least equal to converged - if eltype(H) <: Real && H[keep + 1, keep] != 0 # we are in the middle of a 2x2 block - keep += 1 # conservative choice - keep >= krylovdim && - error("krylov dimension $(krylovdim) too small to compute $howmany eigenvalues") + if eltype(H) <: Real && H[keep + 1, keep] != 0 + # we are in the middle of a 2x2 block; this cannot happen if keep == converged, so we can decrease keep + # however, we have to make sure that we do not end up with keep = 0 + if keep > 1 + keep -= 1 # conservative choice + else + keep += 1 + if krylovdim == 2 + alg.verbosity >= WARN_LEVEL && + @warn "Arnoldi iteration got stuck in a 2x2 block, consider increasing the Krylov dimension" + break + end + end end # Restore Arnoldi form in the first keep columns @@ -274,7 +441,7 @@ function _schursolve(A, x₀, howmany::Int, which::Selector, alg::Arnoldi) B[keep + 1] = scale!!(r, 1 / normres(fact)) # Shrink Arnoldi factorization - fact = shrink!(fact, keep) + fact = shrink!(fact, keep; verbosity=alg.verbosity) numiter += 1 end end diff --git a/src/eigsolve/eigsolve.jl b/src/eigsolve/eigsolve.jl index 786b4f39..5d63ab9d 100644 --- a/src/eigsolve/eigsolve.jl +++ b/src/eigsolve/eigsolve.jl @@ -3,7 +3,7 @@ eigsolve(f, n::Int, [howmany = 1, which = :LM, T = Float64]; kwargs...) eigsolve(f, x₀, [howmany = 1, which = :LM]; kwargs...) # expert version: - eigsolve(f, x₀, howmany, which, algorithm) + eigsolve(f, x₀, howmany, which, algorithm; alg_rrule=...) Compute at least `howmany` eigenvalues from the linear map encoded in the matrix `A` or by the function `f`. Return eigenvalues, eigenvectors and a `ConvergenceInfo` structure. @@ -12,12 +12,12 @@ the function `f`. Return eigenvalues, eigenvectors and a `ConvergenceInfo` struc The linear map can be an `AbstractMatrix` (dense or sparse) or a general function or callable object. If an `AbstractMatrix` is used, a starting vector `x₀` does not need to be -provided, it is then chosen as `rand(T, size(A,1))`. If the linear map is encoded more +provided, it is then chosen as `rand(T, size(A, 1))`. If the linear map is encoded more generally as a a callable function or method, the best approach is to provide an explicit starting guess `x₀`. Note that `x₀` does not need to be of type `AbstractVector`; any type that behaves as a vector and supports the required methods (see KrylovKit docs) is accepted. If instead of `x₀` an integer `n` is specified, it is assumed that `x₀` is a regular vector -and it is initialized to `rand(T,n)`, where the default value of `T` is `Float64`, unless +and it is initialized to `rand(T, n)`, where the default value of `T` is `Float64`, unless specified differently. The next arguments are optional, but should typically be specified. `howmany` specifies how @@ -98,8 +98,12 @@ The return value is always of the form `vals, vecs, info = eigsolve(...)` with Keyword arguments and their default values are given by: - - `verbosity::Int = 0`: verbosity level, i.e. 0 (no messages), 1 (single message - at the end), 2 (information after every iteration), 3 (information per Krylov step) + - `verbosity::Int = 0`: verbosity level, i.e. + - 0 (suppress all messages) + - 1 (only warnings) + - 2 (one message with convergence info at the end) + - 3 (progress info after every iteration) + - 4+ (all of the above and additional information about the Lanczos or Arnoldi iteration) - `tol::Real`: the requested accuracy (corresponding to the 2-norm of the residual for Schur vectors, not the eigenvectors). If you work in e.g. single precision (`Float32`), you should definitely change the default value. @@ -129,6 +133,14 @@ is used, `issymmetric` and `ishermitian` are checked for that matrix, otherwise values are `issymmetric = false` and `ishermitian = T <: Real && issymmetric`. When values for the keyword arguments are provided, no checks will be performed even in the matrix case. +The final keyword argument `alg_rrule` is relevant only when `eigsolve` is used in a setting +where reverse-mode automatic differentation will be used. A custom `ChainRulesCore.rrule` is +defined for `eigsolve`, which can be evaluated using different algorithms that can be specified +via `alg_rrule`. A suitable default is chosen, so this keyword argument should only be used +when this default choice is failing or not performing efficiently. Check the documentation for +more information on the possible values for `alg_rrule` and their implications on the algorithm +being used. + ### Algorithm The final (expert) method, without default values and keyword arguments, is the one that is @@ -149,6 +161,8 @@ restarts are so-called thick restarts where a part of the current Krylov subspac See also [`schursolve`](@ref) if you want to use the partial Schur decomposition directly, or if you are not interested in computing the eigenvectors, and want to work in real arithmetic all the way true (if the linear map and starting guess are real). + If you have knowledge that all requested eigenvalues of a real problem will be real, + and thus also their associated eigenvectors, you can also use [`realeigsolve`](@ref). """ function eigsolve end @@ -172,7 +186,8 @@ function eigsolve(A::AbstractMatrix, which::Selector=:LM, T::Type=eltype(A); kwargs...) - return eigsolve(A, rand(T, size(A, 1)), howmany, which; kwargs...) + x₀ = Random.rand!(similar(A, T, size(A, 1))) + return eigsolve(A, x₀, howmany, which; kwargs...) end function eigsolve(f, n::Int, howmany::Int=1, which::Selector=:LM, T::Type=Float64; @@ -190,27 +205,36 @@ function eigsolve(f, x₀, howmany::Int=1, which::Selector=:LM; kwargs...) error("Eigenvalue selector which = $which invalid: real eigenvalues expected with Lanczos algorithm") end elseif T <: Real - if which == :LI || - which == :SI || - (which isa EigSorter && which.by(+im) != which.by(-im)) + by, rev = eigsort(which) + if by(+im) != by(-im) error("Eigenvalue selector which = $which invalid because it does not treat `λ` and `conj(λ)` equally: work in complex arithmetic by providing a complex starting vector `x₀`") end end - return eigsolve(f, x₀, howmany, which, alg) + if haskey(kwargs, :alg_rrule) + alg_rrule = kwargs[:alg_rrule] + else + alg_rrule = Arnoldi(; tol=alg.tol, + krylovdim=alg.krylovdim, + maxiter=alg.maxiter, + eager=alg.eager, + orth=alg.orth) + end + return eigsolve(f, x₀, howmany, which, alg; alg_rrule=alg_rrule) end function eigselector(f, T::Type; issymmetric::Bool=false, - ishermitian::Bool=issymmetric && !(T <: Complex), - krylovdim::Int=KrylovDefaults.krylovdim, - maxiter::Int=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, + ishermitian::Bool=issymmetric && (T <: Real), + krylovdim::Int=KrylovDefaults.krylovdim[], + maxiter::Int=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], orth::Orthogonalizer=KrylovDefaults.orth, eager::Bool=false, - verbosity::Int=0) - if (issymmetric && !(T <: Complex)) || ishermitian + verbosity::Int=KrylovDefaults.verbosity[], + alg_rrule=nothing) + if (T <: Real && issymmetric) || ishermitian return Lanczos(; krylovdim=krylovdim, maxiter=maxiter, tol=tol, @@ -230,12 +254,13 @@ function eigselector(A::AbstractMatrix, T::Type; issymmetric::Bool=T <: Real && LinearAlgebra.issymmetric(A), ishermitian::Bool=issymmetric || LinearAlgebra.ishermitian(A), - krylovdim::Int=KrylovDefaults.krylovdim, - maxiter::Int=KrylovDefaults.maxiter, - tol::Real=KrylovDefaults.tol, + krylovdim::Int=KrylovDefaults.krylovdim[], + maxiter::Int=KrylovDefaults.maxiter[], + tol::Real=KrylovDefaults.tol[], orth::Orthogonalizer=KrylovDefaults.orth, eager::Bool=false, - verbosity::Int=0) + verbosity::Int=KrylovDefaults.verbosity[], + alg_rrule=nothing) if (T <: Real && issymmetric) || ishermitian return Lanczos(; krylovdim=krylovdim, maxiter=maxiter, diff --git a/src/eigsolve/geneigsolve.jl b/src/eigsolve/geneigsolve.jl index ddddbe8e..f27cb643 100644 --- a/src/eigsolve/geneigsolve.jl +++ b/src/eigsolve/geneigsolve.jl @@ -88,8 +88,11 @@ The return value is always of the form `vals, vecs, info = geneigsolve(...)` wit Keyword arguments and their default values are given by: - - `verbosity::Int = 0`: verbosity level, i.e. 0 (no messages), 1 (single message - at the end), 2 (information after every iteration), 3 (information per Krylov step) + - `verbosity::Int = 0`: verbosity level, i.e. + - 0 (suppress all messages) + - 1 (only warnings) + - 2 (one message with convergence info at the end) + - 3 (progress info after every iteration) - `tol::Real`: the requested accuracy, relative to the 2-norm of the corresponding eigenvectors, i.e. convergence is achieved if `norm((A - λB)x) < tol * norm(x)`. Because eigenvectors are now normalised such that `dot(x, B*x) = 1`, `norm(x)` is not @@ -150,21 +153,24 @@ function geneigsolve(AB::Tuple{AbstractMatrix,AbstractMatrix}, if !(size(AB[1], 1) == size(AB[1], 2) == size(AB[2], 1) == size(AB[2], 2)) throw(DimensionMismatch("Matrices `A` and `B` should be square and have matching size")) end - return geneigsolve(AB, rand(T, size(AB[1], 1)), howmany::Int, which; kwargs...) + x₀ = Random.rand!(similar(AB[1], T, size(AB[1], 1))) + return geneigsolve(AB, x₀, howmany::Int, which; kwargs...) end function geneigsolve(AB::Tuple{Any,AbstractMatrix}, howmany::Int=1, which::Selector=:LM, T=eltype(AB[2]); kwargs...) - return geneigsolve(AB, rand(T, size(AB[2], 1)), howmany, which; kwargs...) + x₀ = Random.rand!(similar(AB[2], T, size(AB[2], 1))) + return geneigsolve(AB, x₀, howmany, which; kwargs...) end function geneigsolve(AB::Tuple{AbstractMatrix,Any}, howmany::Int=1, which::Selector=:LM, T=eltype(AB[1]); kwargs...) - return geneigsolve(AB, rand(T, size(AB[1], 1)), howmany, which; kwargs...) + x₀ = Random.rand!(similar(AB[1], T, size(AB[1], 1))) + return geneigsolve(AB, x₀, howmany, which; kwargs...) end function geneigsolve(f, diff --git a/src/eigsolve/golubye.jl b/src/eigsolve/golubye.jl index f6bc0ca5..d1ec449c 100644 --- a/src/eigsolve/golubye.jl +++ b/src/eigsolve/golubye.jl @@ -44,10 +44,18 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe) K = 1 HHA[K, K] = real(α) + if alg.verbosity >= EACHITERATION_LEVEL + 1 + @info "Golub-Ye iteration $numiter, step $K: normres = $(normres2string(β))" + end while true β = norm(r) if β <= tol && K < howmany - @warn "Invariant subspace of dimension $K (up to requested tolerance `tol = $tol`), which is smaller than the number of requested eigenvalues (i.e. `howmany == $howmany`); setting `howmany = $K`." + if alg.verbosity >= WARN_LEVEL + msg = "Invariant subspace of dimension $K (up to requested tolerance `tol = $tol`), " + msg *= "which is smaller than the number of requested eigenvalues (i.e. `howmany == $howmany`);" + msg *= "setting `howmany = $K`." + @warn msg + end howmany = K end if K == krylovdim - converged || β <= tol # process @@ -98,53 +106,34 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe) resize!(vectors, 0) resize!(residuals, 0) resize!(normresiduals, 0) - while converged < K - z = view(Z, :, p[converged + 1]) - v = mul!(zerovector(vold), V, z) + for k in 1:K + z = view(Z, :, p[k]) + v = unproject!!(zerovector(vold), V, z) av, bv = genapply(f, v) numops += 1 ρ = checkhermitian(inner(v, av)) / checkposdef(inner(v, bv)) r = add!!(av, bv, -ρ) β = norm(r) - if β > tol * norm(z) - break + if β < tol * norm(z) + converged += 1 + elseif numiter < maxiter + break # in last iteration, keep adding nonconverged vectors up to howmany end - push!(values, ρ) push!(vectors, v) push!(residuals, r) push!(normresiduals, β) - converged += 1 + if (k == howmany && numiter == maxiter) + break + end end - if converged >= howmany howmany = converged break - elseif numiter == maxiter - for k in (converged + 1):howmany - z = view(Z, :, p[k]) - v = mul!(zerovector(vold), V, z) - av, bv = genapply(f, v) - numops += 1 - ρ = checkhermitian(inner(v, av)) / checkposdef(inner(v, bv)) - r = add!!(av, bv, -ρ) - β = norm(r) - - push!(values, ρ) - push!(vectors, v) - push!(residuals, r) - push!(normresiduals, β) - end - elseif alg.verbosity > 1 - msg = "Golub-Ye geneigsolve in iter $numiter: " - msg *= "$converged values converged, normres = (" - for i in 1:converged - msg *= @sprintf("%.2e", normresiduals[i]) - msg *= ", " - end - msg *= @sprintf("%.2e", β) * ")" - @info msg + end + if alg.verbosity >= EACHITERATION_LEVEL + @info "Golub-Ye geneigsolve in iter $numiter: $converged values converged, normres = $(normres2string(normresiduals))" end end @@ -162,8 +151,8 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe) HHA[K, K] = checkhermitian(α, n) push!(BV, bv) - if alg.verbosity > 2 - @info "Golub-Ye iteration $numiter, step $K: normres = $β" + if alg.verbosity >= EACHITERATION_LEVEL + 1 + @info "Golub-Ye iteration $numiter, step $K: normres = $(normres2string(β))" end else # restart numiter == maxiter && break @@ -183,20 +172,21 @@ function geneigsolve(f, x₀, howmany::Int, which::Selector, alg::GolubYe) HHA[K, K] = real(α) push!(BV, bv) numiter += 1 + if alg.verbosity >= EACHITERATION_LEVEL + 1 + @info "Golub-Ye iteration $numiter, step $K: normres = $(normres2string(β))" + end end end - if alg.verbosity > 0 - if converged < howmany - @warn """Golub-Ye geneigsolve finished without convergence after $numiter iterations: - * $converged eigenvalues converged - * norm of residuals = $((normresiduals...,)) - * number of operations = $numops""" - else - @info """Golub-Ye geneigsolve finished after $numiter iterations: - * $converged eigenvalues converged - * norm of residuals = $((normresiduals...,)) - * number of operations = $numops""" - end + if (converged < howmany) && alg.verbosity >= WARN_LEVEL + @warn """Golub-Ye geneigsolve stopped without convergence after $numiter iterations: + * $converged eigenvalues converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" + elseif alg.verbosity >= STARTSTOP_LEVEL + @info """Golub-Ye geneigsolve finished after $numiter iterations: + * $converged eigenvalues converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" end return values, diff --git a/src/eigsolve/lanczos.jl b/src/eigsolve/lanczos.jl index 097c7e12..b23f98fd 100644 --- a/src/eigsolve/lanczos.jl +++ b/src/eigsolve/lanczos.jl @@ -1,14 +1,19 @@ -function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Lanczos) +function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Lanczos; + alg_rrule=Arnoldi(; tol=alg.tol, + krylovdim=alg.krylovdim, + maxiter=alg.maxiter, + eager=alg.eager, + orth=alg.orth)) krylovdim = alg.krylovdim maxiter = alg.maxiter - howmany > krylovdim && + if howmany > krylovdim error("krylov dimension $(krylovdim) too small to compute $howmany eigenvalues") + end ## FIRST ITERATION: setting up - # Initialize Lanczos factorization iter = LanczosIterator(A, x₀, alg.orth) - fact = initialize(iter; verbosity=alg.verbosity - 2) + fact = initialize(iter; verbosity=alg.verbosity) numops = 1 numiter = 1 sizehint!(fact, krylovdim) @@ -26,10 +31,11 @@ function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Lanczos) K = length(fact) # diagonalize Krylov factorization - if β <= tol - if K < howmany - @warn "Invariant subspace of dimension $K (up to requested tolerance `tol = $tol`), which is smaller than the number of requested eigenvalues (i.e. `howmany == $howmany`); setting `howmany = $K`." - howmany = K + if β <= tol && K < howmany + if alg.verbosity >= WARN_LEVEL + msg = "Invariant subspace of dimension $K (up to requested tolerance `tol = $tol`), " + msg *= "which is smaller than the number of requested eigenvalues (i.e. `howmany == $howmany`)." + @warn msg end end if K == krylovdim || β <= tol || (alg.eager && K >= howmany) @@ -57,23 +63,15 @@ function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Lanczos) end end - if converged >= howmany + if converged >= howmany || β <= tol break - elseif alg.verbosity > 1 - msg = "Lanczos eigsolve in iter $numiter, krylovdim = $K: " - msg *= "$converged values converged, normres = (" - msg *= @sprintf("%.2e", abs(f[1])) - for i in 2:howmany - msg *= ", " - msg *= @sprintf("%.2e", abs(f[i])) - end - msg *= ")" - @info msg + elseif alg.verbosity >= EACHITERATION_LEVEL + @info "Lanczos eigsolve in iteration $numiter, step = $K: $converged values converged, normres = $(normres2string(abs.(f[1:howmany])))" end end - if K < krylovdim# expand Krylov factorization - fact = expand!(iter, fact; verbosity=alg.verbosity - 2) + if K < krylovdim # expand Krylov factorization + fact = expand!(iter, fact; verbosity=alg.verbosity) numops += 1 else ## shrink and restart if numiter == maxiter @@ -109,18 +107,21 @@ function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Lanczos) B[keep + 1] = scale!!(r, 1 / β) # Shrink Lanczos factorization - fact = shrink!(fact, keep) + fact = shrink!(fact, keep; verbosity=alg.verbosity) numiter += 1 end end + howmany′ = howmany if converged > howmany - howmany = converged + howmany′ = converged + elseif length(D) < howmany + howmany′ = length(D) end - values = D[1:howmany] + values = D[1:howmany′] # Compute eigenvectors - V = view(U, :, 1:howmany) + V = view(U, :, 1:howmany′) # Compute convergence information vectors = let B = basis(fact) @@ -130,21 +131,19 @@ function eigsolve(A, x₀, howmany::Int, which::Selector, alg::Lanczos) [scale(r, last(v)) for v in cols(V)] end normresiduals = let f = f - map(i -> abs(f[i]), 1:howmany) + map(i -> abs(f[i]), 1:howmany′) end - if alg.verbosity > 0 - if converged < howmany - @warn """Lanczos eigsolve finished without convergence after $numiter iterations: - * $converged eigenvalues converged - * norm of residuals = $((normresiduals...,)) - * number of operations = $numops""" - else - @info """Lanczos eigsolve finished after $numiter iterations: - * $converged eigenvalues converged - * norm of residuals = $((normresiduals...,)) - * number of operations = $numops""" - end + if (converged < howmany) && alg.verbosity >= WARN_LEVEL + @warn """Lanczos eigsolve stopped without convergence after $numiter iterations: + * $converged eigenvalues converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" + elseif alg.verbosity >= STARTSTOP_LEVEL + @info """Lanczos eigsolve finished after $numiter iterations: + * $converged eigenvalues converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" end return values, diff --git a/src/eigsolve/svdsolve.jl b/src/eigsolve/svdsolve.jl index a74bf3df..2ee60f0b 100644 --- a/src/eigsolve/svdsolve.jl +++ b/src/eigsolve/svdsolve.jl @@ -1,9 +1,9 @@ """ svdsolve(A::AbstractMatrix, [x₀, howmany = 1, which = :LR, T = eltype(A)]; kwargs...) svdsolve(f, m::Int, [howmany = 1, which = :LR, T = Float64]; kwargs...) - svdsolve(f, x₀, [howmany = 1, which = :LM]; kwargs...) + svdsolve(f, x₀, [howmany = 1, which = :LR]; kwargs...) # expert version: - svdsolve(f, x₀, howmany, which, algorithm) + svdsolve(f, x₀, howmany, which, algorithm; alg_rrule=...) Compute `howmany` singular values from the linear map encoded in the matrix `A` or by the function `f`. Return singular values, left and right singular vectors and a @@ -21,7 +21,7 @@ the normal action of the linear map needs to be computed. The latter form still well with the `do` block syntax of Julia, as in ```julia -vals, lvecs, rvecs, info = svdsolve(x₀, y₀, howmany, which; kwargs...) do x, flag +vals, lvecs, rvecs, info = svdsolve(x₀, howmany, which; kwargs...) do x, flag if flag === Val(true) # y = compute action of adjoint map on x else @@ -36,7 +36,7 @@ approach is to provide a start vector `x₀` (in the codomain, i.e. column space linear map). Alternatively, one can specify the number `m` of rows of the linear map, in which case `x₀ = rand(T, m)` is used, where the default value of `T` is `Float64`, unless specified differently. If an `AbstractMatrix` is used, a starting vector `x₀` does not need -to be provided; it is chosen as `rand(T, size(A,1))`. +to be provided; it is chosen as `rand(T, size(A, 1))`. The next arguments are optional, but should typically be specified. `howmany` specifies how many singular values and vectors should be computed; `which` specifies which singular @@ -57,8 +57,7 @@ The return value is always of the form `vals, lvecs, rvecs, info = svdsolve(...) - `rvecs`: a `Vector` of corresponding right singular vectors, of the same length as `vals`. Note that singular vectors are not returned as a matrix, as the linear map could act on any custom Julia type with vector like behavior, i.e. the elements of the - lists `lvecs`(`rvecs`) are objects that are typically similar to the starting guess `y₀` - (`x₀`), up to a possibly different `eltype`. When the linear map is a simple + lists `lvecs`(`rvecs`) are objects that are typically similar to the starting guess `x₀`(`A' * x₀`), up to a possibly different `eltype`. When the linear map is a simple `AbstractMatrix`, `lvecs` and `rvecs` will be `Vector{Vector{<:Number}}`. - `info`: an object of type [`ConvergenceInfo`], which has the following fields @@ -82,8 +81,12 @@ The return value is always of the form `vals, lvecs, rvecs, info = svdsolve(...) Keyword arguments and their default values are given by: - - `verbosity::Int = 0`: verbosity level, i.e. 0 (no messages), 1 (single message - at the end), 2 (information after every iteration), 3 (information per Krylov step) + - `verbosity::Int = 0`: verbosity level + - 0 (suppress all messages) + - 1 (only warnings) + - 2 (one message with convergence info at the end) + - 3 (progress info after every iteration) + - 4+ (all of the above and additional information about the GKL iteration) - `krylovdim`: the maximum dimension of the Krylov subspace that will be constructed. Note that the dimension of the vector space is not known or checked, e.g. `x₀` should not necessarily support the `Base.length` function. If you know the actual problem @@ -98,6 +101,14 @@ Keyword arguments and their default values are given by: Krylov subspace to test for convergence, otherwise wait until the Krylov subspace has dimension `krylovdim` +The final keyword argument `alg_rrule` is relevant only when `svdsolve` is used in a setting +where reverse-mode automatic differentation will be used. A custom `ChainRulesCore.rrule` is +defined for `svdsolve`, which can be evaluated using different algorithms that can be specified +via `alg_rrule`. A suitable default is chosen, so this keyword argument should only be used +when this default choice is failing or not performing efficiently. Check the documentation for +more information on the possible values for `alg_rrule` and their implications on the algorithm +being used. + ### Algorithm The last method, without default values and keyword arguments, is the one that is finally @@ -114,7 +125,8 @@ function svdsolve(A::AbstractMatrix, which::Selector=:LR, T::Type=eltype(A); kwargs...) - return svdsolve(A, rand(T, size(A, 1)), howmany, which; kwargs...) + x₀ = Random.rand!(similar(A, T, size(A, 1))) + return svdsolve(A, x₀, howmany, which; kwargs...) end function svdsolve(f, n::Int, howmany::Int=1, which::Selector=:LR, T::Type=Float64; kwargs...) @@ -129,7 +141,13 @@ function svdsolve(f, x₀, howmany::Int=1, which::Selector=:LR; kwargs...) return svdsolve(f, x₀, howmany, which, alg) end -function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL) +function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL; + alg_rrule=Arnoldi(; tol=alg.tol, + krylovdim=alg.krylovdim, + maxiter=alg.maxiter, + eager=alg.eager, + orth=alg.orth, + verbosity=alg.verbosity)) krylovdim = alg.krylovdim maxiter = alg.maxiter howmany > krylovdim && @@ -139,7 +157,7 @@ function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL) numiter = 1 # initialize GKL factorization iter = GKLIterator(A, x₀, alg.orth) - fact = initialize(iter; verbosity=alg.verbosity - 2) + fact = initialize(iter; verbosity=alg.verbosity) numops = 2 sizehint!(fact, krylovdim) β = normres(fact) @@ -157,10 +175,11 @@ function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL) β = normres(fact) K = length(fact) - if β < tol - if K < howmany - @warn "Invariant subspace of dimension $K (up to requested tolerance `tol = $tol`), which is smaller than the number of requested singular values (i.e. `howmany == $howmany`); setting `howmany = $K`." - howmany = K + if β <= tol && K < howmany + if alg.verbosity >= WARN_LEVEL + msg = "Invariant subspace of dimension $K (up to requested tolerance `tol = $tol`), " + msg *= "which is smaller than the number of requested eigenvalues (i.e. `howmany == $howmany`)." + @warn msg end end if K == krylovdim || β <= tol || (alg.eager && K >= howmany) @@ -187,23 +206,15 @@ function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL) converged += 1 end - if converged >= howmany + if converged >= howmany || β <= tol break - elseif alg.verbosity > 1 - msg = "GKL svdsolve in iter $numiter, krylovdim $krylovdim: " - msg *= "$converged values converged, normres = (" - msg *= @sprintf("%.2e", abs(f[1])) - for i in 2:howmany - msg *= ", " - msg *= @sprintf("%.2e", abs(f[i])) - end - msg *= ")" - @info msg + elseif alg.verbosity >= EACHITERATION_LEVEL + @info "GKL svdsolve in iteration $numiter, step $K: $converged values converged, normres = $(normres2string(abs.(f[1:howmany])))" end end if K < krylovdim # expand - fact = expand!(iter, fact; verbosity=alg.verbosity - 2) + fact = expand!(iter, fact; verbosity=alg.verbosity) numops += 2 else ## shrink and restart if numiter == maxiter @@ -214,14 +225,14 @@ function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL) keep = div(3 * krylovdim + 2 * converged, 5) # strictly smaller than krylovdim since converged < howmany <= krylovdim, at least equal to converged # Update basis by applying P and Q using Householder reflections - U = basis(fact, :U) + U = basis(fact, Val(:U)) basistransform!(U, view(P, :, 1:keep)) # for j = 1:m # h, ν = householder(P, j:m, j) # lmul!(h, view(P, :, j+1:krylovdim)) # rmul!(U, h') # end - V = basis(fact, :V) + V = basis(fact, Val(:V)) basistransform!(V, view(Q', :, 1:keep)) # for j = 1:m # h, ν = householder(Q, j, j:m) @@ -256,7 +267,7 @@ function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL) fact.βs[j] = H[j + 1, j] end # Shrink GKL factorization - fact = shrink!(fact, keep) + fact = shrink!(fact, keep; verbosity=alg.verbosity) numiter += 1 end end @@ -270,10 +281,10 @@ function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL) Qv = view(Q, 1:howmany, :) # Compute convergence information - leftvectors = let U = basis(fact, :U) + leftvectors = let U = basis(fact, Val(:U)) [U * v for v in cols(Pv)] end - rightvectors = let V = basis(fact, :V) + rightvectors = let V = basis(fact, Val(:V)) [V * v for v in cols(Qv')] end residuals = let r = residual(fact) @@ -282,18 +293,16 @@ function svdsolve(A, x₀, howmany::Int, which::Symbol, alg::GKL) normresiduals = let f = f map(i -> abs(f[i]), 1:howmany) end - if alg.verbosity > 0 - if converged < howmany - @warn """GKL svdsolve finished without convergence after $numiter iterations: - * $converged singular values converged - * norm of residuals = $((normresiduals...,)) - * number of operations = $numops""" - else - @info """GKL svdsolve finished after $numiter iterations: - * $converged singular values converged - * norm of residuals = $((normresiduals...,)) - * number of operations = $numops""" - end + if (converged < howmany) && alg.verbosity >= WARN_LEVEL + @warn """GKL svdsolve finished without convergence after $numiter iterations: + * $converged singular values converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" + elseif alg.verbosity >= STARTSTOP_LEVEL + @info """GKL svdsolve finished after $numiter iterations: + * $converged singular values converged + * norm of residuals = $(normres2string(normresiduals)) + * number of operations = $numops""" end return values, diff --git a/src/factorizations/arnoldi.jl b/src/factorizations/arnoldi.jl index b02a87fd..e6cd344b 100644 --- a/src/factorizations/arnoldi.jl +++ b/src/factorizations/arnoldi.jl @@ -133,16 +133,17 @@ function Base.iterate(iter::ArnoldiIterator, state) end end -function initialize(iter::ArnoldiIterator; verbosity::Int=0) +function initialize(iter::ArnoldiIterator; verbosity::Int=KrylovDefaults.verbosity[]) # initialize without using eltype x₀ = iter.x₀ β₀ = norm(x₀) iszero(β₀) && throw(ArgumentError("initial vector should not have norm zero")) Ax₀ = apply(iter.operator, x₀) α = inner(x₀, Ax₀) / (β₀ * β₀) - T = typeof(α) + T = typeof(α) # scalar type of the Rayleigh quotient # this line determines the vector type that we will henceforth use - v = add!!(zerovector(Ax₀, T), x₀, 1 / β₀) + # vector scalar type can be different from `T`, e.g. for real inner products + v = add!!(scale(Ax₀, zero(α)), x₀, 1 / β₀) if typeof(Ax₀) != typeof(v) r = add!!(zerovector(v), Ax₀, 1 / β₀) else @@ -168,12 +169,13 @@ function initialize(iter::ArnoldiIterator; verbosity::Int=0) end V = OrthonormalBasis([v]) H = T[α, β] - if verbosity > 0 - @info "Arnoldi iteration step 1: normres = $β" + if verbosity > EACHITERATION_LEVEL + @info "Arnoldi initiation at dimension 1: subspace normres = $(normres2string(β))" end return state = ArnoldiFactorization(1, V, H, r) end -function initialize!(iter::ArnoldiIterator, state::ArnoldiFactorization; verbosity::Int=0) +function initialize!(iter::ArnoldiIterator, state::ArnoldiFactorization; + verbosity::Int=KrylovDefaults.verbosity[]) x₀ = iter.x₀ V = state.V while length(V) > 1 @@ -188,12 +190,13 @@ function initialize!(iter::ArnoldiIterator, state::ArnoldiFactorization; verbosi state.k = 1 push!(H, α, β) state.r = r - if verbosity > 0 - @info "Arnoldi iteration step 1: normres = $β" + if verbosity > EACHITERATION_LEVEL + @info "Arnoldi initiation at dimension 1: subspace normres = $(normres2string(β))" end return state end -function expand!(iter::ArnoldiIterator, state::ArnoldiFactorization; verbosity::Int=0) +function expand!(iter::ArnoldiIterator, state::ArnoldiFactorization; + verbosity::Int=KrylovDefaults.verbosity[]) state.k += 1 k = state.k V = state.V @@ -206,12 +209,12 @@ function expand!(iter::ArnoldiIterator, state::ArnoldiFactorization; verbosity:: r, β = arnoldirecurrence!!(iter.operator, V, view(H, (m + 1):(m + k)), iter.orth) H[m + k + 1] = β state.r = r - if verbosity > 0 - @info "Arnoldi iteration step $k: normres = $β" + if verbosity > EACHITERATION_LEVEL + @info "Arnoldi expansion to dimension $k: subspace normres = $(normres2string(β))" end return state end -function shrink!(state::ArnoldiFactorization, k) +function shrink!(state::ArnoldiFactorization, k; verbosity::Int=KrylovDefaults.verbosity[]) length(state) <= k && return state V = state.V H = state.H @@ -221,7 +224,11 @@ function shrink!(state::ArnoldiFactorization, k) r = pop!(V) resize!(H, (k * k + 3 * k) >> 1) state.k = k - state.r = scale!!(r, normres(state)) + β = normres(state) + if verbosity > EACHITERATION_LEVEL + @info "Arnoldi reduction to dimension $k: subspace normres = $(normres2string(β))" + end + state.r = scale!!(r, β) return state end diff --git a/src/factorizations/gkl.jl b/src/factorizations/gkl.jl index 02c42e37..7e3380e9 100644 --- a/src/factorizations/gkl.jl +++ b/src/factorizations/gkl.jl @@ -11,8 +11,8 @@ A' * U = V * B' ``` For a given GKL factorization `fact` of length `k = length(fact)`, the two bases `U` and `V` -are obtained via [`basis(fact, :U)`](@ref basis) and `basis(fact, :V)`. Here, `U` and `V` -are instances of [`OrthonormalBasis{T}`](@ref Basis), with also +are obtained via [`basis(fact, Val(:U))`](@ref basis) and `basis(fact, Val(:V))`. Here, +`U` and `V` are instances of [`OrthonormalBasis{T}`](@ref Basis), with also `length(U) == length(V) == k` and where `T` denotes the type of vector like objects used in the problem. The Rayleigh quotient `B` is obtained as [`rayleighquotient(fact)`](@ref) and is of type `Bidiagonal{S<:Number}` with `size(B) == (k,k)`. The residual `r` is @@ -50,8 +50,8 @@ Base.eltype(F::GKLFactorization) = eltype(typeof(F)) Base.eltype(::Type{<:GKLFactorization{<:Any,<:Any,S}}) where {S} = S # iteration for destructuring into components -Base.iterate(F::GKLFactorization) = (basis(F, :U), Val(:V)) -Base.iterate(F::GKLFactorization, ::Val{:V}) = (basis(F, :V), Val(:rayleighquotient)) +Base.iterate(F::GKLFactorization) = (basis(F, Val(:U)), Val(:V)) +Base.iterate(F::GKLFactorization, ::Val{:V}) = (basis(F, Val(:V)), Val(:rayleighquotient)) function Base.iterate(F::GKLFactorization, ::Val{:rayleighquotient}) return (rayleighquotient(F), Val(:residual)) end @@ -63,17 +63,17 @@ end Base.iterate(F::GKLFactorization, ::Val{:done}) = nothing """ - basis(fact::GKLFactorization, which::Symbol) + basis(fact::GKLFactorization, ::Val{which}) Return the list of basis vectors of a [`GKLFactorization`](@ref), where `which` should take the value `:U` or `:V` and indicates which set of basis vectors (in the domain or in the codomain of the corresponding linear map) should be returned. The return type is an `OrthonormalBasis{T}`, where `T` represents the type of the vectors used by the problem. """ -function basis(F::GKLFactorization, which::Symbol) +function basis(F::GKLFactorization, ::Val{UV}) where {UV} length(F.U) == F.k || error("Not keeping vectors during GKL bidiagonalization") - which == :U || which == :V || error("invalid flag for specifying basis") - return which == :U ? F.U : F.V + UV == :U || UV == :V || error("invalid flag for specifying basis") + return UV == :U ? F.U : F.V end function rayleighquotient(F::GKLFactorization) return Bidiagonal(view(F.αs, 1:(F.k)), view(F.βs, 1:(F.k - 1)), :L) @@ -106,7 +106,7 @@ possibly uses reorthogonalization steps. When iterating over an instance of `GKLIterator`, the values being generated are instances `fact` of [`GKLFactorization`](@ref), which can be immediately destructured into a -[`basis(fact, :U)`](@ref), [`basis(fact, :V)`](@ref), [`rayleighquotient`](@ref), +[`basis(fact, Val(:U))`](@ref), [`basis(fact, Val(:V))`](@ref), [`rayleighquotient`](@ref), [`residual`](@ref), [`normres`](@ref) and [`rayleighextension`](@ref), for example as ```julia @@ -183,7 +183,7 @@ function Base.iterate(iter::GKLIterator, state::GKLFactorization) end end -function initialize(iter::GKLIterator; verbosity::Int=0) +function initialize(iter::GKLIterator; verbosity::Int=KrylovDefaults.verbosity[]) # initialize without using eltype u₀ = iter.u₀ β₀ = norm(u₀) @@ -193,10 +193,10 @@ function initialize(iter::GKLIterator; verbosity::Int=0) Av₀ = apply_normal(iter.operator, v₀) # apply operator α² = inner(u₀, Av₀) / β₀^2 α² ≈ α * α || throw(ArgumentError("operator and its adjoint are not compatible")) - T = typeof(α²) - # these lines determines the type that we will henceforth use + T = typeof(α²) # scalar type of the Rayleigh quotient - u = scale!!(zerovector(u₀, T), u₀, 1 / β₀) # (one(T) / β₀) * u₀ + # these lines determines the vector types that we will henceforth use + u = scale(u₀, one(T) / β₀) v = scale(v₀, one(T) / (α * β₀)) if typeof(Av₀) == typeof(u) r = scale!!(Av₀, 1 / (α * β₀)) @@ -211,13 +211,13 @@ function initialize(iter::GKLIterator; verbosity::Int=0) S = real(T) αs = S[α] βs = S[β] - if verbosity > 0 - @info "GKL iteration step 1: normres = $β" + if verbosity > EACHITERATION_LEVEL + @info "GKL initiation at dimension 1: subspace normres = $(normres2string(β))" end - return GKLFactorization(1, U, V, αs, βs, r) end -function initialize!(iter::GKLIterator, state::GKLFactorization; verbosity::Int=0) +function initialize!(iter::GKLIterator, state::GKLFactorization; + verbosity::Int=KrylovDefaults.verbosity[]) U = state.U while length(U) > 1 pop!(U) @@ -239,13 +239,13 @@ function initialize!(iter::GKLIterator, state::GKLFactorization; verbosity::Int= push!(αs, α) push!(βs, β) state.r = r - if verbosity > 0 - @info "GKL iteration step 1: normres = $β" + if verbosity > EACHITERATION_LEVEL + @info "GKL initiation at dimension 1: subspace normres = $(normres2string(β))" end - return state end -function expand!(iter::GKLIterator, state::GKLFactorization; verbosity::Int=0) +function expand!(iter::GKLIterator, state::GKLFactorization; + verbosity::Int=KrylovDefaults.verbosity[]) βold = normres(state) U = state.U V = state.V @@ -261,13 +261,12 @@ function expand!(iter::GKLIterator, state::GKLFactorization; verbosity::Int=0) state.k += 1 state.r = r - if verbosity > 0 - @info "GKL iteration step $(state.k): normres = $β" + if verbosity > EACHITERATION_LEVEL + @info "GKL expension to dimension $(state.k): subspace normres = $(normres2string(β))" end - return state end -function shrink!(state::GKLFactorization, k) +function shrink!(state::GKLFactorization, k; verbosity::Int=KrylovDefaults.verbosity[]) length(state) == length(state.V) || error("we cannot shrink GKLFactorization without keeping vectors") length(state) <= k && return state @@ -282,7 +281,11 @@ function shrink!(state::GKLFactorization, k) resize!(state.αs, k) resize!(state.βs, k) state.k = k - state.r = scale!!(r, normres(state)) + β = normres(state) + if verbosity > EACHITERATION_LEVEL + @info "GKL reduction to dimension $k: subspace normres = $(normres2string(β))" + end + state.r = scale!!(r, β) return state end diff --git a/src/factorizations/lanczos.jl b/src/factorizations/lanczos.jl index ccf3ef54..31d9375b 100644 --- a/src/factorizations/lanczos.jl +++ b/src/factorizations/lanczos.jl @@ -164,57 +164,59 @@ function Base.iterate(iter::LanczosIterator, state::LanczosFactorization) end end -function initialize(iter::LanczosIterator; verbosity::Int=0) +function warn_nonhermitian(α, β₁, β₂) + n = hypot(α, β₁, β₂) + if abs(imag(α)) / n > eps(one(n))^(2 / 5) + @warn "ignoring imaginary component $(imag(α)) from total weight $n: operator might not be hermitian?" α β₁ β₂ + end + return nothing +end + +function initialize(iter::LanczosIterator; verbosity::Int=KrylovDefaults.verbosity[]) # initialize without using eltype x₀ = iter.x₀ β₀ = norm(x₀) iszero(β₀) && throw(ArgumentError("initial vector should not have norm zero")) Ax₀ = apply(iter.operator, x₀) α = inner(x₀, Ax₀) / (β₀ * β₀) - n = abs(α) - imag(α) <= sqrt(max(eps(n), eps(one(n)))) || - error("operator does not appear to be hermitian: $(imag(α)) vs $n") - T = typeof(α) + T = typeof(α) # scalar type of the Rayleigh quotient # this line determines the vector type that we will henceforth use - v = add!!(zerovector(Ax₀, T), x₀, 1 / β₀) + # vector scalar type can be different from `T`, e.g. for real inner products + v = add!!(scale(Ax₀, zero(α)), x₀, 1 / β₀) if typeof(Ax₀) != typeof(v) r = add!!(zerovector(v), Ax₀, 1 / β₀) else r = scale!!(Ax₀, 1 / β₀) end βold = norm(r) - r = add!!(r, v, -α) + r = add!!(r, v, -α) # should we use real(α) here? β = norm(r) # possibly reorthogonalize if iter.orth isa Union{ClassicalGramSchmidt2,ModifiedGramSchmidt2} dα = inner(v, r) - n = hypot(dα, β) - imag(dα) <= sqrt(max(eps(n), eps(one(n)))) || - error("operator does not appear to be hermitian: $(imag(dα)) vs $n") α += dα - r = add!!(r, v, -dα) + r = add!!(r, v, -dα) # should we use real(dα) here? β = norm(r) elseif iter.orth isa Union{ClassicalGramSchmidtIR,ModifiedGramSchmidtIR} while eps(one(β)) < β < iter.orth.η * βold βold = β dα = inner(v, r) - n = hypot(dα, β) - imag(dα) <= sqrt(max(eps(n), eps(one(n)))) || - error("operator does not appear to be hermitian: $(imag(dα)) vs $n") α += dα - r = add!!(r, v, -dα) + r = add!!(r, v, -dα) # should we use real(dα) here? β = norm(r) end end + verbosity >= WARN_LEVEL && warn_nonhermitian(α, zero(β), β) V = OrthonormalBasis([v]) αs = [real(α)] βs = [β] - if verbosity > 0 - @info "Lanczos iteration step 1: normres = $β" + if verbosity > EACHITERATION_LEVEL + @info "Lanczos initiation at dimension 1: subspace normres = $(normres2string(β))" end return LanczosFactorization(1, V, αs, βs, r) end -function initialize!(iter::LanczosIterator, state::LanczosFactorization; verbosity::Int=0) +function initialize!(iter::LanczosIterator, state::LanczosFactorization; + verbosity::Int=KrylovDefaults.verbosity[]) x₀ = iter.x₀ V = state.V while length(V) > 1 @@ -227,28 +229,25 @@ function initialize!(iter::LanczosIterator, state::LanczosFactorization; verbosi w = apply(iter.operator, V[1]) r, α = orthogonalize!!(w, V[1], iter.orth) β = norm(r) - n = hypot(α, β) - imag(α) <= sqrt(max(eps(n), eps(one(n)))) || - error("operator does not appear to be hermitian: $(imag(α)) vs $n") + verbosity >= WARN_LEVEL && warn_nonhermitian(α, zero(β), β) state.k = 1 push!(αs, real(α)) push!(βs, β) state.r = r - if verbosity > 0 - @info "Lanczos iteration step 1: normres = $β" + if verbosity > EACHITERATION_LEVEL + @info "Lanczos initiation at dimension 1: subspace normres = $(normres2string(β))" end return state end -function expand!(iter::LanczosIterator, state::LanczosFactorization; verbosity::Int=0) +function expand!(iter::LanczosIterator, state::LanczosFactorization; + verbosity::Int=KrylovDefaults.verbosity[]) βold = normres(state) V = state.V r = state.r V = push!(V, scale!!(r, 1 / βold)) r, α, β = lanczosrecurrence(iter.operator, V, βold, iter.orth) - n = hypot(α, β, βold) - imag(α) <= sqrt(max(eps(n), eps(one(n)))) || - error("operator does not appear to be hermitian: $(imag(α)) vs $n") + verbosity >= WARN_LEVEL && warn_nonhermitian(α, βold, β) αs = push!(state.αs, real(α)) βs = push!(state.βs, β) @@ -257,12 +256,12 @@ function expand!(iter::LanczosIterator, state::LanczosFactorization; verbosity:: state.k += 1 state.r = r - if verbosity > 0 - @info "Lanczos iteration step $(state.k): normres = $β" + if verbosity > EACHITERATION_LEVEL + @info "Lanczos expansion to dimension $(state.k): subspace normres = $(normres2string(β))" end return state end -function shrink!(state::LanczosFactorization, k) +function shrink!(state::LanczosFactorization, k; verbosity::Int=KrylovDefaults.verbosity[]) length(state) == length(state.V) || error("we cannot shrink LanczosFactorization without keeping Lanczos vectors") length(state) <= k && return state @@ -274,7 +273,11 @@ function shrink!(state::LanczosFactorization, k) resize!(state.αs, k) resize!(state.βs, k) state.k = k - state.r = scale!!(r, normres(state)) + β = normres(state) + if verbosity > EACHITERATION_LEVEL + @info "Lanczos reduction to dimension $k: subspace normres = $(normres2string(β))" + end + state.r = scale!!(r, β) return state end diff --git a/src/innerproductvec.jl b/src/innerproductvec.jl index e61de6bb..f4f6951f 100644 --- a/src/innerproductvec.jl +++ b/src/innerproductvec.jl @@ -89,17 +89,31 @@ end function VectorInterface.scale(v::InnerProductVec, a::Number) return InnerProductVec(scale(v.vec, a), v.dotf) end - +function VectorInterface.scale!!(v::InnerProductVec, a::Number) + return InnerProductVec(scale!!(v.vec, a), v.dotf) +end function VectorInterface.scale!(v::InnerProductVec, a::Number) scale!(v.vec, a) return v end +function VectorInterface.scale!!(w::InnerProductVec{F}, v::InnerProductVec{F}, + a::Number) where {F} + return InnerProductVec(scale!!(w.vec, v.vec, a), w.dotf) +end function VectorInterface.scale!(w::InnerProductVec{F}, v::InnerProductVec{F}, a::Number) where {F} scale!(w.vec, v.vec, a) return w end +function VectorInterface.add(v::InnerProductVec{F}, w::InnerProductVec{F}, a::Number, + b::Number) where {F} + return InnerProductVec(add(v.vec, w.vec, a, b), v.dotf) +end +function VectorInterface.add!!(v::InnerProductVec{F}, w::InnerProductVec{F}, a::Number, + b::Number) where {F} + return InnerProductVec(add!!(v.vec, w.vec, a, b), v.dotf) +end function VectorInterface.add!(v::InnerProductVec{F}, w::InnerProductVec{F}, a::Number, b::Number) where {F} add!(v.vec, w.vec, a, b) diff --git a/src/linsolve/bicgstab.jl b/src/linsolve/bicgstab.jl index 2ff7f87d..da469811 100644 --- a/src/linsolve/bicgstab.jl +++ b/src/linsolve/bicgstab.jl @@ -1,4 +1,4 @@ -function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number=1) +function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number=1; alg_rrule=alg) # Initial function operation and division defines number type y₀ = apply(operator, x₀) T = typeof(inner(b, y₀) / norm(b) * one(a₀) * one(a₁)) @@ -20,12 +20,14 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number # Check for early return if normr < tol - if alg.verbosity > 0 + if alg.verbosity >= STARTSTOP_LEVEL @info """BiCGStab linsolve converged without any iterations: - * norm of residual = $normr - * number of operations = 1""" + * norm of residual = $(normres2string(normr)) + * number of operations = $numops""" end return (x, ConvergenceInfo(1, r, normr, numiter, numops)) + elseif alg.verbosity >= STARTSTOP_LEVEL + @info "BiCGStab linsolve starts with norm of residual = $(normres2string(normr))" end # First iteration @@ -35,9 +37,11 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number # Method fails if ρ is zero. if ρ ≈ 0.0 - @warn """BiCGStab linsolve errored after $numiter iterations: - * norm of residual = $normr - * number of operations = $numops""" + if alg.verbosity >= WARN_LEVEL + @warn """BiCGStab linsolve errored after $numiter iterations: + * norm of residual = $(normres2string(normr)) + * number of operations = $numops""" + end return (x, ConvergenceInfo(0, r, normr, numiter, numops)) end @@ -66,10 +70,10 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number normr_act = norm(s) if normr_act < tol - if alg.verbosity > 0 + if alg.verbosity >= STARTSTOP_LEVEL @info """BiCGStab linsolve converged at iteration $(numiter-1/2): - * norm of residual = $normr_act - * number of operations = $numops""" + * norm of residual = $(normres2string(normr_act)) + * number of operations = $numops""" end return (xhalf, ConvergenceInfo(1, s, normr_act, numiter, numops)) end @@ -97,23 +101,19 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number normr_act = norm(r) if normr_act < tol - if alg.verbosity > 0 + if alg.verbosity >= STARTSTOP_LEVEL @info """BiCGStab linsolve converged at iteration $(numiter): - * norm of residual = $normr_act - * number of operations = $numops""" + * norm of residual = $(normres2string(normr_act)) + * number of operations = $numops""" end return (x, ConvergenceInfo(1, r, normr_act, numiter, numops)) end end + if alg.verbosity >= EACHITERATION_LEVEL + @info "BiCGStab linsolve in iteration $numiter: normres = $(normres2string(normr))" + end - while numiter < maxiter - if alg.verbosity > 0 - msg = "BiCGStab linsolve in iter $numiter: " - msg *= "normres = " - msg *= @sprintf("%12e", normr) - @info msg - end - + while true numiter += 1 ρold = ρ ρ = inner(r_shadow, r) @@ -136,13 +136,6 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number normr = norm(s) - if alg.verbosity > 0 - msg = "BiCGStab linsolve in iter $(numiter-1/2): " - msg *= "normres = " - msg *= @sprintf("%12e", normr) - @info msg - end - # Check for return at half step. if normr < tol # Compute non-approximate residual. @@ -152,14 +145,17 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number normr_act = norm(s) if normr_act < tol - if alg.verbosity > 0 + if alg.verbosity >= STARTSTOP_LEVEL @info """BiCGStab linsolve converged at iteration $(numiter-1/2): - * norm of residual = $normr_act - * number of operations = $numops""" + * norm of residual = $(normres2string(normr_act)) + * number of operations = $numops""" end return (xhalf, ConvergenceInfo(1, s, normr_act, numiter, numops)) end end + if alg.verbosity >= EACHITERATION_LEVEL + @info "BiCGStab linsolve in iteration $(numiter-1/2): normres = $(normres2string(normr))" + end ## GMRES part of the algorithm. t = apply(operator, s, α₀, α₁) @@ -183,20 +179,24 @@ function linsolve(operator, b, x₀, alg::BiCGStab, a₀::Number=0, a₁::Number normr_act = norm(r) if normr_act < tol - if alg.verbosity > 0 + if alg.verbosity >= STARTSTOP_LEVEL @info """BiCGStab linsolve converged at iteration $(numiter): - * norm of residual = $normr_act - * number of operations = $numops""" + * norm of residual = $(normres2string(normr_act)) + * number of operations = $numops""" end return (x, ConvergenceInfo(1, r, normr_act, numiter, numops)) end end + if numiter >= maxiter + if alg.verbosity >= WARN_LEVEL + @warn """BiCGStab linsolve stopped without converging after $numiter iterations: + * norm of residual = $(normres2string(normr)) + * number of operations = $numops""" + end + return (x, ConvergenceInfo(0, r, normr, numiter, numops)) + end + if alg.verbosity >= EACHITERATION_LEVEL + @info "BiCGStab linsolve in iteration $numiter: normres = $(normres2string(normr))" + end end - - if alg.verbosity > 0 - @warn """BiCGStab linsolve finished without converging after $numiter iterations: - * norm of residual = $normr - * number of operations = $numops""" - end - return (x, ConvergenceInfo(0, r, normr, numiter, numops)) end diff --git a/src/linsolve/cg.jl b/src/linsolve/cg.jl index 9270b1c3..0b5eeac6 100644 --- a/src/linsolve/cg.jl +++ b/src/linsolve/cg.jl @@ -1,4 +1,4 @@ -function linsolve(operator, b, x₀, alg::CG, a₀::Real=0, a₁::Real=1) +function linsolve(operator, b, x₀, alg::CG, a₀::Real=0, a₁::Real=1; alg_rrule=alg) # Initial function operation and division defines number type y₀ = apply(operator, x₀) T = typeof(inner(b, y₀) / norm(b) * one(a₀) * one(a₁)) @@ -19,7 +19,16 @@ function linsolve(operator, b, x₀, alg::CG, a₀::Real=0, a₁::Real=1) numiter = 0 # Check for early return - normr < tol && return (x, ConvergenceInfo(1, r, normr, numiter, numops)) + if normr < tol + if alg.verbosity >= STARTSTOP_LEVEL + @info """CG linsolve converged without any iterations: + * norm of residual = $(normres2string(normr)) + * number of operations = 1""" + end + return (x, ConvergenceInfo(1, r, normr, numiter, numops)) + elseif alg.verbosity >= STARTSTOP_LEVEL + @info "CG linsolve starts with norm of residual = $(normres2string(normr))" + end # First iteration ρ = normr^2 @@ -34,17 +43,23 @@ function linsolve(operator, b, x₀, alg::CG, a₀::Real=0, a₁::Real=1) β = ρ / ρold numops += 1 numiter += 1 - if alg.verbosity > 1 - msg = "CG linsolve in iter $numiter: " - msg *= "normres = " - msg *= @sprintf("%.12e", normr) - @info msg + + if normr < tol + if alg.verbosity >= STARTSTOP_LEVEL + @info """CG linsolve converged at iteration $numiter: + * norm of residual = $(normres2string(normr)) + * number of operations = $numops""" + end + return (x, ConvergenceInfo(1, r, normr, numiter, numops)) + end + if alg.verbosity >= EACHITERATION_LEVEL + @info "CG linsolve in iteration $numiter: normres = $(normres2string(normr))" end # Check for early return normr < tol && return (x, ConvergenceInfo(1, r, normr, numiter, numops)) - while numiter < maxiter + while true p = add!!(p, r, 1, β) q = apply(operator, p, α₀, α₁) α = ρ / inner(p, q) @@ -62,27 +77,26 @@ function linsolve(operator, b, x₀, alg::CG, a₀::Real=0, a₁::Real=1) ρ = normr^2 β = ρ / ρold end + numops += 1 + numiter += 1 if normr < tol - if alg.verbosity > 0 + if alg.verbosity >= STARTSTOP_LEVEL @info """CG linsolve converged at iteration $numiter: - * norm of residual = $normr - * number of operations = $numops""" + * norm of residual = $(normres2string(normr)) + * number of operations = $numops""" end return (x, ConvergenceInfo(1, r, normr, numiter, numops)) end - numops += 1 - numiter += 1 - if alg.verbosity > 1 - msg = "CG linsolve in iter $numiter: " - msg *= "normres = " - msg *= @sprintf("%.12e", normr) - @info msg + if numiter >= maxiter + if alg.verbosity >= WARN_LEVEL + @warn """CG linsolve stopped without converging after $numiter iterations: + * norm of residual = $(normres2string(normr)) + * number of operations = $numops""" + end + return (x, ConvergenceInfo(0, r, normr, numiter, numops)) + end + if alg.verbosity >= EACHITERATION_LEVEL + @info "CG linsolve in iteration $numiter: normres = $(normres2string(normr))" end end - if alg.verbosity > 0 - @warn """CG linsolve finished without converging after $numiter iterations: - * norm of residual = $normr - * number of operations = $numops""" - end - return (x, ConvergenceInfo(0, r, normr, numiter, numops)) end diff --git a/src/linsolve/gmres.jl b/src/linsolve/gmres.jl index 129c43db..45619bc1 100644 --- a/src/linsolve/gmres.jl +++ b/src/linsolve/gmres.jl @@ -1,4 +1,4 @@ -function linsolve(operator, b, x₀, alg::GMRES, a₀::Number=0, a₁::Number=1) +function linsolve(operator, b, x₀, alg::GMRES, a₀::Number=0, a₁::Number=1; alg_rrule=alg) # Initial function operation and division defines number type y₀ = apply(operator, x₀) T = typeof(inner(b, y₀) / norm(b) * one(a₀) * one(a₁)) @@ -19,12 +19,14 @@ function linsolve(operator, b, x₀, alg::GMRES, a₀::Number=0, a₁::Number=1) # Check for early return if β < tol - if alg.verbosity > 0 + if alg.verbosity >= STARTSTOP_LEVEL @info """GMRES linsolve converged without any iterations: - * norm of residual = $β - * number of operations = 1""" + * norm of residual = $(normres2string(β)) + * number of operations = 1""" end return (x, ConvergenceInfo(1, r, β, 0, 1)) + elseif alg.verbosity >= STARTSTOP_LEVEL + @info "GMRES linsolve starts with norm of residual = $(normres2string(β))" end # Initialize data structures @@ -32,13 +34,14 @@ function linsolve(operator, b, x₀, alg::GMRES, a₀::Number=0, a₁::Number=1) gs = Vector{Givens{T}}(undef, krylovdim) R = fill(zero(T), (krylovdim, krylovdim)) numiter = 0 - numops = 1 # operator has been applied once to determine T + numops = 1 # operator has been applied once to determine T and r iter = ArnoldiIterator(operator, r, alg.orth) - fact = initialize(iter) + fact = initialize(iter; verbosity=0) + sizehint!(fact, alg.krylovdim) numops += 1 # start applies operator once - while numiter < maxiter # restart loop + while true # restart loop numiter += 1 y[1] = β k = 1 @@ -48,15 +51,12 @@ function linsolve(operator, b, x₀, alg::GMRES, a₀::Number=0, a₁::Number=1) y[2] = zero(T) lmul!(gs[1], y) β = convert(S, abs(y[2])) - if alg.verbosity > 2 - msg = "GMRES linsolve in iter $numiter; step $k: " - msg *= "normres = " - msg *= @sprintf("%.12e", β) - @info msg - end while (β > tol && length(fact) < krylovdim) # inner arnoldi loop - fact = expand!(iter, fact) + if alg.verbosity >= EACHITERATION_LEVEL + @info "GMRES linsolve in iteration $numiter; step $k: normres = $(normres2string(β))" + end + fact = expand!(iter, fact; verbosity=0) numops += 1 # expand! applies the operator once k = length(fact) H = rayleighquotient(fact) @@ -82,18 +82,6 @@ function linsolve(operator, b, x₀, alg::GMRES, a₀::Number=0, a₁::Number=1) # New error β = convert(S, abs(y[k + 1])) - if alg.verbosity > 2 - msg = "GMRES linsolve in iter $numiter; step $k: " - msg *= "normres = " - msg *= @sprintf("%.12e", β) - @info msg - end - end - if alg.verbosity > 1 - msg = "GMRES linsolve in iter $numiter; finished at step $k: " - msg *= "normres = " - msg *= @sprintf("%.12e", β) - @info msg end # Solve upper triangular system @@ -122,24 +110,28 @@ function linsolve(operator, b, x₀, alg::GMRES, a₀::Number=0, a₁::Number=1) numops += 1 β = norm(r) if β < tol - if alg.verbosity > 0 + if alg.verbosity >= STARTSTOP_LEVEL @info """GMRES linsolve converged at iteration $numiter, step $k: - * norm of residual = $β - * number of operations = $numops""" + * norm of residual = $(normres2string(β)) + * number of operations = $numops""" end return (x, ConvergenceInfo(1, r, β, numiter, numops)) end end + if numiter >= maxiter + if alg.verbosity >= WARN_LEVEL + @warn """GMRES linsolve stopped without converging after $numiter iterations: + * norm of residual = $(normres2string(β)) + * number of operations = $numops""" + end + return (x, ConvergenceInfo(0, r, β, numiter, numops)) + end + if alg.verbosity >= EACHITERATION_LEVEL + @info "GMRES linsolve in iteration $numiter; step $k: normres = $(normres2string(β))" + end # Restart Arnoldi factorization with new r iter = ArnoldiIterator(operator, r, alg.orth) - fact = initialize!(iter, fact) - end - - if alg.verbosity > 0 - @warn """GMRES linsolve finished without converging after $numiter iterations: - * norm of residual = $β - * number of operations = $numops""" + fact = initialize!(iter, fact; verbosity=0) end - return (x, ConvergenceInfo(0, r, β, numiter, numops)) end diff --git a/src/linsolve/linsolve.jl b/src/linsolve/linsolve.jl index 6b4359f4..41500de0 100644 --- a/src/linsolve/linsolve.jl +++ b/src/linsolve/linsolve.jl @@ -2,7 +2,7 @@ linsolve(A::AbstractMatrix, b::AbstractVector, [x₀, a₀::Number = 0, a₁::Number = 1]; kwargs...) linsolve(f, b, [x₀, a₀::Number = 0, a₁::Number = 1]; kwargs...) # expert version: - linsolve(f, b, x₀, algorithm, [a₀::Number = 0, a₁::Number = 1]) + linsolve(f, b, x₀, algorithm, [a₀::Number = 0, a₁::Number = 1]; alg_rrule=algorithm) Compute a solution `x` to the linear system `(a₀ + a₁ * A)*x = b` or `a₀ * x + a₁ * f(x) = b`, possibly using a starting guess `x₀`. Return the approximate @@ -22,7 +22,7 @@ type `x` and `b`. The return value is always of the form `x, info = linsolve(...)` with - `x`: the approximate solution to the problem, similar type as the right hand side `b` - but possibly with a different `eltype` + but possibly with a different `scalartype` - `info`: an object of type [`ConvergenceInfo`], which has the following fields @@ -43,8 +43,11 @@ The return value is always of the form `x, info = linsolve(...)` with Keyword arguments are given by: - - `verbosity::Int = 0`: verbosity level, i.e. 0 (no messages), 1 (single message - at the end), 2 (information after every iteration), 3 (information per Krylov step) + - `verbosity::Int = 0`: verbosity level, i.e. + - 0 (suppress all messages) + - 1 (only warnings) + - 2 (information at the beginning and end) + - 3 (progress info after every iteration) - `atol::Real`: the requested accuracy, i.e. absolute tolerance, on the norm of the residual. - `rtol::Real`: the requested accuracy on the norm of the residual, relative to the norm @@ -62,9 +65,9 @@ Keyword arguments are given by: - `ishermitian::Bool`: if the linear map is hermitian - `isposdef::Bool`: if the linear map is positive definite -The default values are given by `atol = KrylovDefaults.tol`, `rtol = KrylovDefaults.tol`, -`tol = max(atol, rtol*norm(b))`, `krylovdim = KrylovDefaults.krylovdim`, -`maxiter = KrylovDefaults.maxiter`, `orth = KrylovDefaults.orth`; +The default values are given by `atol = KrylovDefaults.tol[]`, `rtol = KrylovDefaults.tol[]`, +`tol = max(atol, rtol*norm(b))`, `krylovdim = KrylovDefaults.krylovdim[]`, +`maxiter = KrylovDefaults.maxiter[]`, `orth = KrylovDefaults.orth`; see [`KrylovDefaults`](@ref) for details. The default value for the last three parameters depends on the method. If an @@ -72,11 +75,20 @@ The default value for the last three parameters depends on the method. If an matrix, ortherwise the default values are `issymmetric = false`, `ishermitian = T <: Real && issymmetric` and `isposdef = false`. +The final keyword argument `alg_rrule` is relevant only when `linsolve` is used in a setting +where reverse-mode automatic differentation will be used. A custom `ChainRulesCore.rrule` is +defined for `linsolve`, which can be evaluated using different algorithms that can be specified +via `alg_rrule`. As the pullback of `linsolve` involves solving a linear system with the +(Hermitian) adjoint of the linear map, the default value is to use the same algorithm. This +keyword argument should only be used when this default choice is failing or not performing +efficiently. Check the documentation for more information on the possible values for +`alg_rrule` and their implications on the algorithm being used. + ### Algorithms The final (expert) method, without default values and keyword arguments, is the one that is finally called, and can also be used directly. Here, one specifies the algorithm explicitly. -Currently, only [`CG`](@ref), [`GMRES`](@ref) and [`BiCGStab`](@ref) are implemented, where +Currently, only [`CG`](@ref), [`GMRES`](@ref), [`BiCGStab`](@ref) and [`LSMR`](@ref) are implemented, where `CG` is chosen if `isposdef == true` and `GMRES` is chosen otherwise. Note that in standard `GMRES` terminology, our parameter `krylovdim` is referred to as the *restart* parameter, and our `maxiter` parameter counts the number of outer iterations, i.e. restart cycles. In @@ -102,7 +114,12 @@ function linsolve(f, b, x₀, a₀::Number=0, a₁::Number=1; kwargs...) T = promote_type(Core.Compiler.return_type(inner, Tuple{Tb,Tfx}), typeof(a₀), typeof(a₁)) alg = linselector(f, b, T; kwargs...) - return linsolve(f, b, x₀, alg, a₀, a₁) + if haskey(kwargs, :alg_rrule) + alg_rrule = kwargs[:alg_rrule] + else + alg_rrule = alg + end + return linsolve(f, b, x₀, alg, a₀, a₁; alg_rrule=alg_rrule) end function linselector(f, @@ -111,30 +128,26 @@ function linselector(f, issymmetric::Bool=false, ishermitian::Bool=T <: Real && issymmetric, isposdef::Bool=false, - krylovdim::Int=KrylovDefaults.krylovdim, - maxiter::Int=KrylovDefaults.maxiter, - rtol::Real=KrylovDefaults.tol, - atol::Real=KrylovDefaults.tol, + krylovdim::Int=KrylovDefaults.krylovdim[], + maxiter::Int=KrylovDefaults.maxiter[], + rtol::Real=KrylovDefaults.tol[], + atol::Real=KrylovDefaults.tol[], tol::Real=max(atol, rtol * norm(b)), orth=KrylovDefaults.orth, - verbosity::Int=0) + verbosity::Int=KrylovDefaults.verbosity[]) if (T <: Real && issymmetric) || ishermitian - isposdef && + if isposdef return CG(; maxiter=krylovdim * maxiter, tol=tol, verbosity=verbosity) - # TODO: implement MINRES for symmetric but not posdef; for now use GRMES - # return MINRES(krylovdim*maxiter, tol=tol) - return GMRES(; krylovdim=krylovdim, - maxiter=maxiter, - tol=tol, - orth=orth, - verbosity=verbosity) - else - return GMRES(; krylovdim=krylovdim, - maxiter=maxiter, - tol=tol, - orth=orth, - verbosity=verbosity) + else + # TODO: implement MINRES for symmetric but not posdef; for now use GRMES + # return MINRES(krylovdim*maxiter, tol=tol) + end end + return GMRES(; krylovdim=krylovdim, + maxiter=maxiter, + tol=tol, + orth=orth, + verbosity=verbosity) end function linselector(A::AbstractMatrix, b, @@ -142,28 +155,100 @@ function linselector(A::AbstractMatrix, issymmetric::Bool=T <: Real && LinearAlgebra.issymmetric(A), ishermitian::Bool=issymmetric || LinearAlgebra.ishermitian(A), isposdef::Bool=ishermitian ? LinearAlgebra.isposdef(A) : false, - krylovdim::Int=KrylovDefaults.krylovdim, - maxiter::Int=KrylovDefaults.maxiter, - rtol::Real=KrylovDefaults.tol, - atol::Real=KrylovDefaults.tol, + krylovdim::Int=KrylovDefaults.krylovdim[], + maxiter::Int=KrylovDefaults.maxiter[], + rtol::Real=KrylovDefaults.tol[], + atol::Real=KrylovDefaults.tol[], tol::Real=max(atol, rtol * norm(b)), orth=KrylovDefaults.orth, - verbosity::Int=0) + verbosity::Int=KrylovDefaults.verbosity[]) if (T <: Real && issymmetric) || ishermitian - isposdef && + if isposdef return CG(; maxiter=krylovdim * maxiter, tol=tol, verbosity=verbosity) - # TODO: implement MINRES for symmetric but not posdef; for now use GRMES - # return MINRES(krylovdim*maxiter, tol=tol, verbosity = verbosity) - return GMRES(; krylovdim=krylovdim, - maxiter=maxiter, - tol=tol, - orth=orth, - verbosity=verbosity) - else - return GMRES(; krylovdim=krylovdim, - maxiter=maxiter, - tol=tol, - orth=orth, - verbosity=verbosity) + else + # TODO: implement MINRES for symmetric but not posdef; for now use GRMES + # return MINRES(krylovdim*maxiter, tol=tol) + end end + return GMRES(; krylovdim=krylovdim, + maxiter=maxiter, + tol=tol, + orth=orth, + verbosity=verbosity) +end + +""" + reallinsolve(f, b, x₀, algorithm, [a₀::Real = 0, a₁::Real = 1]; alg_rrule=algorithm) + +Compute a solution `x` to the linear system `a₀ * x + a₁ * f(x) = b`, using a starting guess +`x₀`, where `f` represents a real linear map. +Return the approximate solution `x` and a `ConvergenceInfo` structure. + +!!! note "Note about real linear maps" + + A function `f` is said to implement a real linear map if it satisfies + `f(add(x,y)) = add(f(x), f(y)` and `f(scale(x, α)) = scale(f(x), α)` for vectors `x` + and `y` and scalars `α::Real`. Note that this is possible even when the vectors are + represented using complex arithmetic. For example, the map `f=x-> x + conj(x)` + represents a real linear map that is not (complex) linear, as it does not satisfy + `f(scale(x, α)) = scale(f(x), α)` for complex scalars `α`. Note that complex linear + maps are always real linear maps and thus can be used in this context, though in that + case `linsolve` and `reallinsolve` target the same solution. However, they still compute + that solution using different arithmetic, and in that case `linsolve` might be more + efficient. + + To interpret the vectors `x` and `y` as elements from a real vector space, the standard + inner product defined on them will be replaced with `real(inner(x,y))`. This has no + effect if the vectors `x` and `y` were represented using real arithmetic to begin with, + and allows to seemlessly use complex vectors as well. + + +### Arguments: + +The linear map can be an `AbstractMatrix` (dense or sparse) or a general function or +callable object. The real numbers `a₀` and `a₁` are optional arguments; they are applied +implicitly, i.e. they do not contribute the computation time of applying the linear map or +to the number of operations on vectors of type `x` and `b`. + +### Return values: + +The return value is always of the form `x, info = linsolve(...)` with + + - `x`: the approximate solution to the problem, similar type as the right hand side `b` + but possibly with a different `scalartype` + + - `info`: an object of type [`ConvergenceInfo`], which has the following fields + + + `info.converged::Int`: takes value 0 or 1 depending on whether the solution was + converged up to the requested tolerance + + `info.residual`: residual `b - f(x)` of the approximate solution `x` + + `info.normres::Real`: norm of the residual, i.e. `norm(info.residual)` + + `info.numops::Int`: total number of times that the linear map was applied, i.e. the + number of times that `f` was called, or a vector was multiplied with `A` + + `info.numiter::Int`: number of times the Krylov subspace was restarted (see below) + +!!! warning "Check for convergence" + + No warning is printed if no converged solution was found, so always check if + `info.converged == 1`. + + +### Algorithms + +The final (expert) method, without default values and keyword arguments, is the one that is +finally called, and can also be used directly. Here, one specifies the algorithm explicitly. +Currently, only [`CG`](@ref), [`GMRES`](@ref) and [`BiCGStab`](@ref) are implemented, where +`CG` is chosen if `isposdef == true` and `GMRES` is chosen otherwise. Note that in standard +`GMRES` terminology, our parameter `krylovdim` is referred to as the *restart* parameter, +and our `maxiter` parameter counts the number of outer iterations, i.e. restart cycles. In +`CG`, the Krylov subspace is only implicit because short recurrence relations are being +used, and therefore no restarts are required. Therefore, we pass `krylovdim*maxiter` as the +maximal number of CG iterations that can be used by the `CG` algorithm. +""" +function reallinsolve(f, b, x₀, alg, a₀::Real=0, a₁::Real=1) + x, info = linsolve(f, RealVec(b), RealVec(x₀), alg, a₀, a₁) + + newinfo = ConvergenceInfo(info.converged, info.residual[], info.normres, info.numiter, + info.numops) + return x[], newinfo end diff --git a/src/lssolve/lsmr.jl b/src/lssolve/lsmr.jl new file mode 100644 index 00000000..6e204de5 --- /dev/null +++ b/src/lssolve/lsmr.jl @@ -0,0 +1,146 @@ +function lssolve(operator, b, alg::LSMR, λ_::Real=0) + # Initialisation: determine number type + u₀ = b + v₀ = apply_adjoint(operator, u₀) + T = typeof(inner(v₀, v₀) / inner(u₀, u₀)) + u = scale(u₀, one(T)) + v = scale(v₀, one(T)) + β = norm(u) + S = typeof(β) + u = scale!!(u, 1 / β) + v = scale!!(v, 1 / β) + α = norm(v) + v = scale!!(v, 1 / α) + + V = OrthonormalBasis([v]) + K = alg.krylovdim + sizehint!(V, K) + Vv = zeros(T, K) # storage for reorthogonalization + + # Scalar variables for the bidiagonalization + ᾱ = α + ζ̄ = α * β + ρ = one(S) + θ = zero(S) + ρ̄ = one(S) + c̄ = one(S) + s̄ = zero(S) + + absζ̄ = abs(ζ̄) + + # Vector variables + x = zerovector(v) + h = scale(v, one(T)) # we need h to be a copy of v when we reuse v₀ in the reorthogonalisation + h̄ = zerovector(v) + + r = scale(u, β) + Ah = zerovector(u) + Ah̄ = zerovector(u) + + # Algorithm parameters + numiter = 0 + numops = 1 # One (adjoint) function application for v + maxiter = alg.maxiter + tol::S = alg.tol + λ::S = convert(S, λ_) + + # Check for early return + if absζ̄ < tol + if alg.verbosity > STARTSTOP_LEVEL + @info """LSMR lssolve converged without any iterations: + * ‖b - A * x ‖ = $(normres2string(β)) + * ‖[b - A * x; λ * x] ‖ = $(normres2string(β)) + * ‖ Aᴴ(b - A x) - λ^2 x ‖ = $(normres2string(absζ̄)) + * number of operations = $numops""" + end + return (x, ConvergenceInfo(1, r, absζ̄, numiter, numops)) + elseif alg.verbosity >= STARTSTOP_LEVEL + @info "LSMR lssolve starts with convergence measure ‖ Aᴴ(b - A x) - λ^2 x ‖ = $(normres2string(absζ̄))" + end + + while true + numiter += 1 + Av = apply_normal(operator, v) + Ah = add!!(Ah, Av, 1, -θ / ρ) + + # βₖ₊₁ uₖ₊₁ = A vₖ - αₖ uₖ₊₁ + u = add!!(Av, u, -α) + β = norm(u) + u = scale!!(u, 1 / β) + # αₖ₊₁ vₖ₊₁ = Aᴴ uₖ₊₁ - βₖ₊₁ vₖ + v = add!!(apply_adjoint(operator, u), v, -β) + # Reorthogonalize v against previous vectors + if K > 1 + v, = orthogonalize!!(v, V, view(Vv, 1:min(K, numiter)), alg.orth) + end + + α = norm(v) + v = scale!!(v, 1 / α) + numops += 2 + + # add new vector to subspace at position numiter+1 + if numiter < K + push!(V, v) + else + V[mod1(numiter + 1, K)] = v + end + + # Construct rotation P̂ₖ + α̂ = hypot(ᾱ, λ) # α̂ₖ = sqrt(ᾱₖ^2 + λ^2) + ĉ = ᾱ / α̂ # ĉ = ᾱₖ / α̂ₖ + ŝ = λ / α̂ # ŝₖ = λ / α̂ₖ + + # Use a plane rotation Pₖ to turn Bₖ to Rₖ + ρold = ρ # ρₖ₋₁ + ρ = hypot(α̂, β) # ρₖ + c = α̂ / ρ # cₖ = α̂ₖ / ρₖ + s = β / ρ # sₖ = βₖ₊₁ / ρₖ + θ = s * α # θₖ₊₁ = sₖ * αₖ₊₁ + ᾱ = c * α # ᾱₖ₊₁ = cₖ * αₖ₊₁ + + # Use a plane rotation P̄ₖ to turn Rₖᵀ to R̄ₖ + ρ̄old = ρ̄ # ρ̄ₖ₋₁ + θ̄ = s̄ * ρ # θ̄ₖ = s̄ₖ₋₁ * ρₖ + c̄ρ = c̄ * ρ # c̄ₖ₋₁ * ρₖ + ρ̄ = hypot(c̄ρ, θ) # ρ̄ₖ = sqrt((c̄ₖ₋₁ * ρₖ)^2 + θₖ₊₁^2) + c̄ = c̄ρ / ρ̄ # c̄ₖ = c̄ₖ₋₁ * ρₖ / ρ̄ₖ + s̄ = θ / ρ̄ # s̄ₖ = θₖ₊₁ / ρ̄ₖ + ζ = c̄ * ζ̄ # ζₖ = c̄ₖ * ζ̄_{k} + ζ̄ = -s̄ * ζ̄ # ζ̄ₖ₊₁ = -s̄ₖ * ζ̄ₖ + + # Update h, h̄, x + h̄ = add!!(h̄, h, 1, -θ̄ * ρ / (ρold * ρ̄old)) # h̄ₖ = hₖ - θ̄ₖ * ρₖ / (ρₖ₋₁ * ρ̄ₖ₋₁) * h̄ₖ₋₁ + Ah̄ = add!!(Ah̄, Ah, 1, -θ̄ * ρ / (ρold * ρ̄old)) # h̄ₖ = hₖ - θ̄ₖ * ρₖ / (ρₖ₋₁ * ρ̄ₖ₋₁) * h̄ₖ₋₁ + + x = add!!(x, h̄, ζ / (ρ * ρ̄)) # xₖ = xₖ₋₁ + ζₖ / (ρₖ * ρ̄ₖ) * h̄ₖ + r = add!!(r, Ah̄, -ζ / (ρ * ρ̄)) # rₖ = rₖ₋₁ - ζₖ / (ρₖ * ρ̄ₖ) * Ah̄ₖ + + h = add!!(h, v, 1, -θ / ρ) # hₖ₊₁ = vₖ₊₁ - θₖ₊₁ / ρₖ * hₖ + # Ah is updated in the next iteration when A v is computed + + absζ̄ = abs(ζ̄) + if absζ̄ <= tol + if alg.verbosity >= STARTSTOP_LEVEL + @info """LSMR lssolve converged at iteration $numiter: + * ‖ b - A x ‖ = $(normres2string(norm(r))) + * ‖ x ‖ = $(normres2string(norm(x))) + * ‖ Aᴴ(b - A x) - λ^2 x ‖ = $(normres2string(absζ̄)) + * number of operations = $numops""" + end + return (x, ConvergenceInfo(1, r, absζ̄, numiter, numops)) + end + if numiter >= maxiter + if alg.verbosity >= WARN_LEVEL + @warn """LSMR lssolve finished without converging after $numiter iterations: + * ‖ b - A x ‖ = $(normres2string(norm(r))) + * ‖ x ‖ = $(normres2string(norm(x))) + * ‖ Aᴴ(b - A x) - λ^2 x ‖ = $(normres2string(absζ̄)) + * number of operations = $numops""" + end + return (x, ConvergenceInfo(0, r, absζ̄, numiter, numops)) + end + if alg.verbosity >= EACHITERATION_LEVEL + @info "LSMR lssolve in iter $numiter: convergence measure ‖ Aᴴ(b - A x) - λ^2 x ‖ = $(normres2string(absζ̄))" + end + end +end diff --git a/src/lssolve/lssolve.jl b/src/lssolve/lssolve.jl new file mode 100644 index 00000000..00148617 --- /dev/null +++ b/src/lssolve/lssolve.jl @@ -0,0 +1,193 @@ +""" + lssolve(A::AbstractMatrix, b::AbstractVector, [λ::Real = 0]; kwargs...) + lssolve(f, b, [λ = 0]; kwargs...) + # expert version: + lssolve(f, b, algorithm, [λ = 0]) + +Compute a least squares solution `x` to the problem `A * x ≈ b` or `f(x) ≈ b` where `f` +encodes a linear map, i.e. a solution `x` that minimizes `norm(b - f(x))`. +Return the approximate solution `x` and a `ConvergenceInfo` structure. + +### Arguments: + +The linear map can be an `AbstractMatrix` (dense or sparse) or a general function or +callable object. Since both the action of the linear map and its adjoint are required in +order to solve the least squares problem, `f` can either be a tuple of two callable objects +(each accepting a single argument), representing the linear map and its adjoint respectively, +or, `f` can be a single callable object that accepts two input arguments, where the second +argument is a flag of type `Val{true}` or `Val{false}` that indicates whether the adjoint or +the normal action of the linear map needs to be computed. The latter form still combines +well with the `do` block syntax of Julia, as in + +```julia +x, info = lssolve(b; kwargs...) do x, flag + if flag === Val(true) + # y = compute action of adjoint map on x + else + # y = compute action of linear map on x + end + return y +end +``` + +If the linear map `A` or `f` has a nontrivial nullspace, so different minimisers exist, the +solution being returned is such that `norm(x)` is minimal. Alternatively, the problem can +be providing a nonzero value for the optional argument `λ`, representing a scalar so +that the minimisation problem `norm(b - A * x)^2 + λ * norm(x)^2` is solved instead. + +!!! info "Starting guess" + Note that `lssolve` does not allow to specify an starting guess `x₀` for the solution. The + starting guess is always assumed to be the zero vector in the domain of the linear map, which + is found by applying the adjoint action of the linear map to `b` and applying `zerovector` + to the result. Given a good initial guess `x₀`, the user can call `lssolve` with a modified + right hand side `b - f(x₀)` and add `x₀` to the solution returned by `lssolve`. The + resulting vector `x` is a least squares solution to the original problem, but such that + `norm(x - x₀)` is minimal or `norm(b - A * x)^2 + λ * norm(x-x₀)^2` is minimised instead + +### Return values: + +The return value is always of the form `x, info = lssolve(...)` with + + - `x`: the least squares solution to the problem, as defined above + + - `info`: an object of type [`ConvergenceInfo`], which has the following fields + + + `info.converged::Int`: takes value 0 or 1 depending on whether the solution was + converged up to the requested tolerance + + `info.residual`: residual `b - A*x` of the approximate solution `x` + + `info.normres::Real`: norm of the residual of the normal equations, + i.e. the quantity `norm(A'*(b - A*x) - λ^2 * x)` that needs to be smaller + than the requested tolerance `tol` in order to have a converged solution + + `info.numops::Int`: total number of times that the linear map was applied, i.e. the + number of times that `f` was called, or a vector was multiplied with `A` or `A'` + + `info.numiter::Int`: total number of iterations of the algorithm + +!!! warning "Check for convergence" + + No warning is printed if no converged solution was found, so always check if + `info.converged == 1`. + +### Keyword arguments: + +Keyword arguments are given by: + + - `verbosity::Int = 0`: verbosity level, i.e. + - 0 (suppress all messages) + - 1 (only warnings) + - 2 (information at the beginning and end) + - 3 (progress info after every iteration) + - `atol::Real`: the requested accuracy, i.e. absolute tolerance, on the norm of the + residual. + - `rtol::Real`: the requested accuracy on the norm of the residual, relative to the norm + of the right hand side `b`. + - `tol::Real`: the requested accuracy on the norm of the residual that is actually used by + the algorithm; it defaults to `max(atol, rtol*norm(b))`. So either use `atol` and `rtol` + or directly use `tol` (in which case the value of `atol` and `rtol` will be ignored). + - `maxiter::Integer`: the number of iterations of the algorithm. Every iteration involves + one application of the linear map and one application of the adjoint of the linear map. + +The default values are given by `atol = KrylovDefaults.tol`, `rtol = KrylovDefaults.tol`, +`tol = max(atol, rtol*norm(b))`, `maxiter = KrylovDefaults.maxiter`; +see [`KrylovDefaults`](@ref) for details. + +### Algorithms + +The final (expert) method, without default values and keyword arguments, is the one that is +finally called, and can also be used directly. Here, one specifies the algorithm explicitly. +Currently, only [`LSMR`](@ref) is available and thus selected. +""" +function lssolve end + +function lssolve(f, b, λ::Real=0; + rtol::Real=KrylovDefaults.tol[], + atol::Real=KrylovDefaults.tol[], + tol::Real=max(atol, rtol * norm(b)), + kwargs...) + alg = LSMR(; tol=tol, kwargs...) + return lssolve(f, b, alg, λ) +end + +""" + reallssolve(f, b, algorithm, [λ::Real = 0]) + +Compute a least squares solution `x` to the problem `f(x) ≈ b` where `f` +encodes a real linear map, i.e. a solution `x` that minimizes `norm(b - f(x))`. +Return the approximate solution `x` and a `ConvergenceInfo` structure. + +!!! note "Note about real linear maps" + + A function `f` is said to implement a real linear map if it satisfies + `f(add(x,y)) = add(f(x), f(y)` and `f(scale(x, α)) = scale(f(x), α)` for vectors `x` + and `y` and scalars `α::Real`. Note that this is possible even when the vectors are + represented using complex arithmetic. For example, the map `f=x-> x + conj(x)` + represents a real linear map that is not (complex) linear, as it does not satisfy + `f(scale(x, α)) = scale(f(x), α)` for complex scalars `α`. Note that complex linear + maps are always real linear maps and thus can be used in this context, though in that + case `lssolve` and `reallssolve` target the same solution. However, they still compute + that solution using different arithmetic, and in that case `lssolve` might be more + efficient. + + To interpret the vectors `x` and `y` as elements from a real vector space, the standard + inner product defined on them will be replaced with `real(inner(x,y))`. This has no + effect if the vectors `x` and `y` were represented using real arithmetic to begin with, + and allows to seemlessly use complex vectors as well. + +### Arguments: + +The real linear map will typically be a function or callable object, as a matrix can only +represent a complex linear map and can thus simply be used in combination with `lssolve`. +Since both the action of the map and its adjoint are required in order to solve the least +squares problem, `f` can either be a tuple of two callable objects (each accepting a single +argument), representing the linear map and its adjoint respectively, or, `f` can be a single +callable object that accepts two input arguments, where the second argument is a flag of +type `Val{true}` or `Val{false}` that indicates whether the adjoint or the normal action of +the real linear map needs to be computed. The latter form still combines well with the `do` +block syntax of Julia, as in + +```julia +x, info = reallssolve(b; kwargs...) do x, flag + if flag === Val(true) + # y = compute action of adjoint map on x + else + # y = compute action of linear map on x + end + return y +end +``` + +If the real linear map `A` or `f` has a nontrivial nullspace, so different minimisers exist, +the solution being returned is such that `norm(x)` is minimal. Alternatively, the problem +can be providing a nonzero value for the optional argument `λ`, representing a scalar so +that the minimisation problem `norm(b - A * x)^2 + λ * norm(x)^2` is solved instead. + +### Return values: + +The return value is always of the form `x, info = reallssolve(...)` with + + - `x`: the least squares solution to the problem, as defined above + + - `info`: an object of type [`ConvergenceInfo`], which has the following fields + + + `info.converged::Int`: takes value 0 or 1 depending on whether the solution was + converged up to the requested tolerance + + `info.residual`: residual `b - A*x` of the approximate solution `x` + + `info.normres::Real`: norm of the residual of the normal equations, + i.e. the quantity `norm(A'*(b - A*x) - λ^2 * x)` that needs to be smaller + than the requested tolerance `tol` in order to have a converged solution + + `info.numops::Int`: total number of times that the linear map was applied, i.e. the + number of times that `f` was called, or a vector was multiplied with `A` or `A'` + + `info.numiter::Int`: total number of iterations of the algorithm + + +### Algorithms + +The final (expert) method, without default values and keyword arguments, is the one that is +finally called, and can also be used directly. Here, one specifies the algorithm explicitly. +Currently, only [`LSMR`](@ref) is available and thus selected. +""" +function reallssolve(f, b, alg, λ::Real=0) + x, info = lssolve(f, RealVec(b), alg, λ) + newinfo = ConvergenceInfo(info.converged, info.residual[], info.normres, info.numiter, + info.numops) + return x[], newinfo +end diff --git a/src/matrixfun/expintegrator.jl b/src/matrixfun/expintegrator.jl index 7c87c26c..618f9a39 100644 --- a/src/matrixfun/expintegrator.jl +++ b/src/matrixfun/expintegrator.jl @@ -17,12 +17,6 @@ linear map, i.e. a `AbstractMatrix` or just a general function or callable objec any eigenvalues with real part larger than zero, however, the solution to the ODE will diverge, i.e. the fixed point is not stable. -!!! warning - - The returned solution might be the solution of the ODE integrated up to a smaller time - ``t̃ = sign(t) * |t̃|`` with ``|t̃| < |t|``, when the required precision could not be - attained. Always check `info.converged > 0` or `info.residual == 0` (see below). - ### Arguments: The linear map `A` can be an `AbstractMatrix` (dense or sparse) or a general function or @@ -41,18 +35,16 @@ of any type and should be in the domain of `A`. The return value is always of the form `y, info = expintegrator(...)` with - `y`: the result of the computation, i.e. - ``y = ϕ₀(t̃*A)*u₀ + t̃*ϕ₁(t̃*A)*u₁ + t̃^2*ϕ₂(t̃*A)*u₂ + …`` - with ``t̃ = sign(t) * |t̃|`` with ``|t̃| <= |t|``, such that the accumulated error in - `y` per unit time is at most equal to the keyword argument `tol` + ``y = ϕ₀(t*A)*u₀ + t*ϕ₁(t*A)*u₁ + t^2*ϕ₂(t*A)*u₂ + …`` - `info`: an object of type [`ConvergenceInfo`], which has the following fields - + `info.converged::Int`: 0 or 1 if the solution `y` was evolved all the way up to the - requested time `t`. - + `info.residual`: there is no residual in the conventional sense, however, this - value equals the residual time `t - t̃`, i.e. it is zero if `info.converged == 1` + + `info.converged::Int`: 0 or 1 if the solution `y` at time `t` was found with an + error below the requested tolerance per unit time, i.e. if `info.normres <= tol * abs(t)` + + `info.residual::Nothing`: value `nothing`, there is no concept of a residual in + this case + `info.normres::Real`: a (rough) estimate of the total error accumulated in the - solution, should be smaller than `tol * |t̃|` + solution + `info.numops::Int`: number of times the linear map was applied, i.e. number of times `f` was called, or a vector was multiplied with `A` + `info.numiter::Int`: number of times the Krylov subspace was restarted (see below) @@ -61,8 +53,12 @@ The return value is always of the form `y, info = expintegrator(...)` with Keyword arguments and their default values are given by: - - `verbosity::Int = 0`: verbosity level, i.e. 0 (no messages), 1 (single message - at the end), 2 (information after every iteration), 3 (information per Krylov step) + - `verbosity::Int = 0`: verbosity level, i.e. + - 0 (suppress all messages) + - 1 (only warnings) + - 2 (one message with convergence info at the end) + - 3 (progress info after every iteration) + - 4+ (all of the above and additional information about the Lanczos or Arnoldi iteration) - `krylovdim = 30`: the maximum dimension of the Krylov subspace that will be constructed. Note that the dimension of the vector space is not known or checked, e.g. `x₀` should not necessarily support the `Base.length` function. If you know the actual problem @@ -116,7 +112,9 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi}) S = real(T) w₀ = scale(u₀, one(T)) - # krylovdim and related allocations + # maxiter, krylovdim and related allocations + maxiter = alg.maxiter + @assert maxiter >= 1 krylovdim = alg.krylovdim K = krylovdim HH = zeros(T, (krylovdim + p + 1, krylovdim + p + 1)) @@ -126,14 +124,23 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi}) totalerr = zero(η) sgn = sign(t) τ::S = abs(t) - τ₀::S = zero(τ) - Δτ::S = τ - τ₀ # don't try any clever initial guesses, rely on correction mechanism + if isfinite(τ) + Δτ = τ # don't try any clever initial guesses, rely on correction mechanism + Δτmin = τ / alg.maxiter + maxerr = τ * η + else + Δτ = oneunit(S) + Δτmin = zero(S) + maxerr = η + end + totaltimestring = @sprintf("%.2e", τ) # safety factors δ::S = 1.2 γ::S = 0.8 # initial vectors + τ₀ = zero(τ) w = Vector{typeof(w₀)}(undef, p + 1) w[1] = w₀ # reuse the result of apply computed earlier: @@ -151,11 +158,11 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi}) end v = zerovector(w₀) β = norm(w[p + 1]) - if β < alg.tol && p == 1 - if alg.verbosity > 0 - @info """expintegrate finished after 0 iterations, converged to fixed point up to error = $β""" + if β < η && p == 1 + if alg.verbosity >= STARTSTOP_LEVEL + @info "expintegrate finished after 0 iterations, converged to fixed point up to error = $(normres2string(β))" end - return w₀, ConvergenceInfo(1, zero(τ), β, 0, numops) + return w₀, ConvergenceInfo(1, nothing, β, 0, numops) end v = scale!!(v, w[p + 1], 1 / β) @@ -165,19 +172,25 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi}) else iter = ArnoldiIterator(A, w[p + 1], alg.orth) end - fact = initialize(iter; verbosity=alg.verbosity - 2) + fact = initialize(iter; verbosity=alg.verbosity) numops += 1 sizehint!(fact, krylovdim) # start outer iteration loop - maxiter = alg.maxiter numiter = 1 while true K = length(fact) V = basis(fact) if K == krylovdim - Δτ = min(Δτ, τ - τ₀) + if numiter < maxiter + Δτ = min(Δτ, τ - τ₀) + if isfinite(τ) # try to adapt minimal time step + Δτmin = (τ - τ₀) / (maxiter - numiter + 1) + end + else + Δτ = τ - τ₀ + end # Small matrix exponential and error estimation H = fill!(view(HH, 1:(K + p + 1), 1:(K + p + 1)), zero(T)) @@ -186,51 +199,45 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi}) for i in 1:p H[K + i, K + i + 1] = 1 end - expH = LinearAlgebra.exp!(H) + expH = exp(H) # LinearAlgebra.exp! is type unstable for SubArray instances ϵ = abs(Δτ^p * β * normres(fact) * expH[K, K + p + 1]) ω = ϵ / (Δτ * η) - q = K / 2 - while ω > one(ω) + q::S = K / 2 + while numiter < maxiter && ω >= one(ω) && Δτ > Δτmin ϵ_prev = ϵ Δτ_prev = Δτ - Δτ *= (γ / ω)^(1 / (q + 1)) + Δτ = max(Δτ * (γ / ω)^(1 / (q + 1)), Δτmin) H = fill!(view(HH, 1:(K + p + 1), 1:(K + p + 1)), zero(T)) mul!(view(H, 1:K, 1:K), rayleighquotient(fact), sgn * Δτ) H[1, K + 1] = 1 for i in 1:p H[K + i, K + i + 1] = 1 end - expH = LinearAlgebra.exp!(H) + expH = exp(H) # LinearAlgebra.exp! is type unstable for SubArray instances ϵ = abs(Δτ^p * β * normres(fact) * expH[K, K + p + 1]) ω = ϵ / (Δτ * η) q = max(zero(q), log(ϵ / ϵ_prev) / log(Δτ / Δτ_prev) - 1) end # take time step + τ₀ = numiter < maxiter ? τ₀ + Δτ : τ # to avoid floating point errors totalerr += ϵ jfac = 1 for j in 1:(p - 1) w₀ = add!!(w₀, w[j + 1], (sgn * Δτ)^j / jfac) jfac *= (j + 1) end - w[p + 1] = mul!(w[p + 1], basis(fact), view(expH, 1:K, K + p)) + w[p + 1] = unproject!!(w[p + 1], basis(fact), view(expH, 1:K, K + p)) # add first correction w[p + 1] = add!!(w[p + 1], residual(fact), expH[K, K + p + 1]) w₀ = add!!(w₀, w[p + 1], β * (sgn * Δτ)^p) - τ₀ += Δτ + w[1] = w₀ # increase time step for next iteration: if ω < γ Δτ *= (γ / ω)^(1 / (q + 1)) end - - if alg.verbosity > 1 - msg = "expintegrate in iteration $numiter: " - msg *= "reached time " * @sprintf("%.2e", τ₀) - msg *= ", total error = " * @sprintf("%.4e", totalerr) - @info msg - end elseif normres(fact) <= ((τ - τ₀) * η) || alg.eager # Small matrix exponential and error estimation H = fill!(view(HH, 1:(K + p + 1), 1:(K + p + 1)), zero(T)) @@ -239,7 +246,7 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi}) for i in 1:p H[K + i, K + i + 1] = 1 end - expH = LinearAlgebra.exp!(H) + expH = exp(H) # LinearAlgebra.exp! is type unstable for SubArray instances ϵ = abs((τ - τ₀)^p * β * normres(fact) * expH[K, K + p + 1]) ω = ϵ / ((τ - τ₀) * η) if ω < one(ω) @@ -250,57 +257,66 @@ function expintegrator(A, t::Number, u::Tuple, alg::Union{Lanczos,Arnoldi}) w₀ = add!!(w₀, w[j + 1], (sgn * (τ - τ₀))^j / jfac) jfac *= (j + 1) end - w[p + 1] = mul!(w[p + 1], basis(fact), view(expH, 1:K, K + p)) + w[p + 1] = unproject!!(w[p + 1], basis(fact), view(expH, 1:K, K + p)) # add first correction w[p + 1] = add!!(w[p + 1], residual(fact), expH[K, K + p + 1]) w₀ = add!!(w₀, w[p + 1], β * (sgn * (τ - τ₀))^p) + w[1] = w₀ τ₀ = τ end end if τ₀ >= τ - if alg.verbosity > 0 - @info """expintegrate finished after $numiter iterations: total error = $totalerr""" + if totalerr <= maxerr + if alg.verbosity >= STARTSTOP_LEVEL + @info """expintegrate finished after $numiter iterations: + * total error = $(normres2string(totalerr)) + * number of operations = $numops""" + end + return w₀, ConvergenceInfo(1, nothing, totalerr, numiter, numops) + else + if alg.verbosity >= WARN_LEVEL + @warn """expintegrate did not reach sufficiently small error after $numiter iterations: + * total error = $(normres2string(totalerr)) + * number of operations = $numops""" + end + return w₀, ConvergenceInfo(0, nothing, totalerr, numiter, numops) end - return w₀, ConvergenceInfo(1, zero(τ), totalerr, numiter, numops) end if K < krylovdim - fact = expand!(iter, fact; verbosity=alg.verbosity - 2) + fact = expand!(iter, fact; verbosity=alg.verbosity) numops += 1 else - if numiter == maxiter - if alg.verbosity > 0 - @warn """expintegrate finished without convergence after $numiter iterations: - total error = $totalerr, residual time = $(τ - τ₀)""" - end - return w₀, ConvergenceInfo(0, τ - τ₀, totalerr, numiter, numops) - else # reinitialize - for j in 1:p - w[j + 1] = apply(A, w[j]) - numops += 1 - lfac = 1 - for l in 0:(p - j) - w[j + 1] = add!!(w[j + 1], u[j + l + 1], (sgn * τ₀)^l / lfac) - lfac *= l + 1 - end + for j in 1:p + w[j + 1] = apply(A, w[j]) + numops += 1 + lfac = 1 + for l in 0:(p - j) + w[j + 1] = add!!(w[j + 1], u[j + l + 1], (sgn * τ₀)^l / lfac) + lfac *= l + 1 end - β = norm(w[p + 1]) - if β < alg.tol && p == 1 # w₀ is fixed point of ODE - if alg.verbosity > 0 - @info """expintegrate finished after $numiter iterations, converged to fixed point up to error = $β""" - end - return w₀, ConvergenceInfo(1, zero(τ), β, numiter, numops) + end + β = norm(w[p + 1]) + if β < η && p == 1 # w₀ is fixed point of ODE + if alg.verbosity >= STARTSTOP_LEVEL + @info "expintegrate finished after $numiter iterations, converged to fixed point up to error = $(normres2string(totalerr))" end - v = scale!!(v, w[p + 1], 1 / β) + return w₀, ConvergenceInfo(1, nothing, β, numiter, numops) + end + v = scale!!(v, w[p + 1], 1 / β) - if alg isa Lanczos - iter = LanczosIterator(A, w[p + 1], alg.orth) - else - iter = ArnoldiIterator(A, w[p + 1], alg.orth) - end - fact = initialize!(iter, fact; verbosity=alg.verbosity - 2) - numops += 1 - numiter += 1 + if alg.verbosity >= EACHITERATION_LEVEL + timestring = @sprintf("%.2e", τ₀) + @info "expintegrate in iteration $numiter: reached time $timestring of $totaltimestring, total error = $(normres2string(totalerr))" end + + if alg isa Lanczos + iter = LanczosIterator(A, w[p + 1], alg.orth) + else + iter = ArnoldiIterator(A, w[p + 1], alg.orth) + end + fact = initialize!(iter, fact; verbosity=alg.verbosity) + numops += 1 + numiter += 1 end end end diff --git a/src/matrixfun/exponentiate.jl b/src/matrixfun/exponentiate.jl index 8f4d63c4..9de0d2b9 100644 --- a/src/matrixfun/exponentiate.jl +++ b/src/matrixfun/exponentiate.jl @@ -27,12 +27,12 @@ The return value is always of the form `y, info = exponentiate(...)` with - `info`: an object of type [`ConvergenceInfo`], which has the following fields - + `info.converged::Int`: 0 or 1 if the solution `y` was approximated up to the - requested tolerance `tol`. + + `info.converged::Int`: 0 or 1 if the solution `y` at time `t` was found with an + error below the requested tolerance per unit time, i.e. if `info.normres <= tol * abs(t)` + `info.residual::Nothing`: value `nothing`, there is no concept of a residual in this case - + `info.normres::Real`: a (rough) estimate of the error between the approximate and - exact solution + + `info.normres::Real`: a (rough) estimate of the total error accumulated in the + solution + `info.numops::Int`: number of times the linear map was applied, i.e. number of times `f` was called, or a vector was multiplied with `A` + `info.numiter::Int`: number of times the Krylov subspace was restarted (see below) @@ -46,8 +46,12 @@ The return value is always of the form `y, info = exponentiate(...)` with Keyword arguments and their default values are given by: - - `verbosity::Int = 0`: verbosity level, i.e. 0 (no messages), 1 (single message - at the end), 2 (information after every iteration), 3 (information per Krylov step) + - `verbosity::Int = 0`: verbosity level, i.e. + - 0 (suppress all messages) + - 1 (only warnings) + - 2 (one message with convergence info at the end) + - 3 (progress info after every iteration) + - 4+ (all of the above and additional information about the Lanczos or Arnoldi iteration) - `krylovdim = 30`: the maximum dimension of the Krylov subspace that will be constructed. Note that the dimension of the vector space is not known or checked, e.g. `x₀` should not necessarily support the `Base.length` function. If you know the actual problem diff --git a/src/orthonormal.jl b/src/orthonormal.jl index 5c3ec973..a0970f60 100644 --- a/src/orthonormal.jl +++ b/src/orthonormal.jl @@ -16,8 +16,8 @@ One can easily orthogonalize or orthonormalize a given vector `v` with respect t [`w, = orthogonalize(v,b,...)`](@ref orthogonalize) or [`w, = orthonormalize(v,b,...)`](@ref orthonormalize). The resulting vector `w` of the latter can then be added to `b` using `push!(b, w)`. Note that in place versions -[`orthogonalize!(v, b, ...)`](@ref orthogonalize) or -[`orthonormalize!(v, b, ...)`](@ref orthonormalize) are also available. +[`orthogonalize!!(v, b, ...)`](@ref orthogonalize) or +[`orthonormalize!!(v, b, ...)`](@ref orthonormalize) are also available. Finally, a linear combination of the vectors in `b::OrthonormalBasis` can be obtained by multiplying `b` with a `Vector{<:Number}` using `*` or `mul!` (if the output vector is @@ -56,12 +56,22 @@ Base.resize!(b::OrthonormalBasis, k::Int) = (resize!(b.basis, k); return b) # Multiplication methods with OrthonormalBasis function Base.:*(b::OrthonormalBasis, x::AbstractVector) y = zerovector(first(b), promote_type(scalartype(x), scalartype(first(b)))) - return mul!(y, b, x) + return unproject!!(y, b, x) end -LinearAlgebra.mul!(y, b::OrthonormalBasis, x::AbstractVector) = unproject!!(y, b, x, 1, 0) const BLOCKSIZE = 4096 +# helper function to determine if a multithreaded approach should be used +# this uses functionality beyond VectorInterface, but can be faster +_use_multithreaded_array_kernel(y) = _use_multithreaded_array_kernel(typeof(y)) +_use_multithreaded_array_kernel(::Type) = false +function _use_multithreaded_array_kernel(::Type{<:Array{T}}) where {T<:Number} + return isbitstype(T) && get_num_threads() > 1 +end +function _use_multithreaded_array_kernel(::Type{<:OrthonormalBasis{T}}) where {T} + return _use_multithreaded_array_kernel(T) +end + """ project!!(y::AbstractVector, b::OrthonormalBasis, x, [α::Number = 1, β::Number = 0, r = Base.OneTo(length(b))]) @@ -127,8 +137,7 @@ function unproject!!(y, α::Number=true, β::Number=false, r=Base.OneTo(length(b))) - if y isa AbstractArray && !(y isa AbstractGPUArray) && IndexStyle(y) isa IndexLinear && - get_num_threads() > 1 + if _use_multithreaded_array_kernel(y) return unproject_linear_multithreaded!(y, b, x, α, β, r) end # general case: using only vector operations, i.e. axpy! (similar to BLAS level 1) @@ -157,7 +166,7 @@ function unproject_linear_multithreaded!(y::AbstractArray, length(b[rj]) == m || throw(DimensionMismatch()) end if n == 0 - return β == 1 ? y : β == 0 ? fill!(y, 0) : rmul!(y, β) + return β == 1 ? y : β == 0 ? zerovector!(y) : scale!(y, β) end let m = m, n = n, y = y, x = x, b = b, blocksize = prevpow(2, div(BLOCKSIZE, n)) @sync for II in splitrange(1:blocksize:m, get_num_threads()) @@ -213,8 +222,7 @@ It is the user's responsibility to make sure that the result is still an orthono α::Number=true, β::Number=true, r=Base.OneTo(length(b))) - if y isa AbstractArray && !(y isa AbstractGPUArray) && IndexStyle(y) isa IndexLinear && - Threads.nthreads() > 1 + if _use_multithreaded_array_kernel(y) return rank1update_linear_multithreaded!(b, y, x, α, β, r) end # general case: using only vector operations, i.e. axpy! (similar to BLAS level 1) @@ -294,8 +302,7 @@ and are stored in `b`, so the old basis vectors are thrown away. Note that, by d the subspace spanned by these basis vectors is exactly the same. """ function basistransform!(b::OrthonormalBasis{T}, U::AbstractMatrix) where {T} # U should be unitary or isometric - if T <: AbstractArray && !(T <: AbstractGPUArray) && IndexStyle(T) isa IndexLinear && - get_num_threads() > 1 + if _use_multithreaded_array_kernel(b) return basistransform_linear_multithreaded!(b, U) end m, n = size(U) @@ -372,10 +379,10 @@ end # end # Orthogonalization of a vector against a given OrthonormalBasis -orthogonalize(v, args...) = orthogonalize!(true * v, args...) +orthogonalize(v, args...) = orthogonalize!!(scale(v, true), args...) function orthogonalize!!(v::T, b::OrthonormalBasis{T}, alg::Orthogonalizer) where {T} - S = promote_type(eltype(v), eltype(T)) + S = promote_type(scalartype(v), scalartype(T)) c = Vector{S}(undef, length(b)) return orthogonalize!!(v, b, c, alg) end diff --git a/src/recursivevec.jl b/src/recursivevec.jl deleted file mode 100644 index cc7379cc..00000000 --- a/src/recursivevec.jl +++ /dev/null @@ -1,117 +0,0 @@ -""" - v = RecursiveVec(vecs) - -Create a new vector `v` from an existing (homogeneous or heterogeneous) list of vectors -`vecs` with one or more elements, represented as a `Tuple` or `AbstractVector`. The elements -of `vecs` can be any type of vectors that are supported by KrylovKit. For a heterogeneous -list, it is best to use a tuple for reasons of type stability, while for a homogeneous list, -either a `Tuple` or a `Vector` can be used. From a mathematical perspectve, `v` represents -the direct sum of the vectors in `vecs`. Scalar multiplication and addition of vectors `v` -acts simultaneously on all elements of `v.vecs`. The inner product corresponds to the sum -of the inner products of the individual vectors in the list `v.vecs`. - -The vector `v` also adheres to the iteration syntax, but where it will just produce the -individual vectors in `v.vecs`. Hence, `length(v) = length(v.vecs)`. It can also be indexed, -so that `v[i] = v.vecs[i]`, which can be useful in writing a linear map that acts on `v`. -""" -struct RecursiveVec{T<:Union{Tuple,AbstractVector}} - vecs::T -end -function RecursiveVec(arg1::AbstractVector{T}) where {T} - if isbitstype(T) - return RecursiveVec((arg1,)) - else - return RecursiveVec{typeof(arg1)}(arg1) - end -end -RecursiveVec(arg1, args...) = RecursiveVec((arg1, args...)) - -Base.getindex(v::RecursiveVec, i) = v.vecs[i] - -Base.iterate(v::RecursiveVec, args...) = iterate(v.vecs, args...) - -Base.IteratorEltype(::Type{RecursiveVec{T}}) where {T} = Base.IteratorEltype(T) -Base.IteratorSize(::Type{RecursiveVec{T}}) where {T} = Base.IteratorSize(T) - -Base.eltype(v::RecursiveVec) = eltype(v.vecs) -Base.size(v::RecursiveVec) = size(v.vecs) -Base.length(v::RecursiveVec) = length(v.vecs) - -Base.first(v::RecursiveVec) = first(v.vecs) -Base.last(v::RecursiveVec) = last(v.vecs) - -Base.:-(v::RecursiveVec) = RecursiveVec(map(-, v.vecs)) -Base.:+(v::RecursiveVec, w::RecursiveVec) = RecursiveVec(map(+, v.vecs, w.vecs)) -Base.:-(v::RecursiveVec, w::RecursiveVec) = RecursiveVec(map(-, v.vecs, w.vecs)) -Base.:*(v::RecursiveVec, a::Number) = RecursiveVec(map(x -> x * a, v.vecs)) -Base.:*(a::Number, v::RecursiveVec) = RecursiveVec(map(x -> a * x, v.vecs)) -Base.:/(v::RecursiveVec, a::Number) = RecursiveVec(map(x -> x / a, v.vecs)) -Base.:\(a::Number, v::RecursiveVec) = RecursiveVec(map(x -> a \ x, v.vecs)) - -function Base.similar(v::RecursiveVec) - return RecursiveVec(similar.(v.vecs)) -end - -function Base.copy!(w::RecursiveVec, v::RecursiveVec) - @assert length(w) == length(v) - @inbounds for i in 1:length(w) - copyto!(w[i], v[i]) - end - return w -end - -function LinearAlgebra.dot(v::RecursiveVec{T}, w::RecursiveVec{T}) where {T} - return sum(dot.(v.vecs, w.vecs)) -end - -VectorInterface.scalartype(::Type{RecursiveVec{T}}) where {T} = scalartype(T) - -function VectorInterface.zerovector(v::RecursiveVec, T::Type{<:Number}) - return RecursiveVec(zerovector(v.vecs, T)) -end - -function VectorInterface.scale(v::RecursiveVec, a::Number) - return RecursiveVec(scale(v.vecs, a)) -end - -function VectorInterface.scale!(v::RecursiveVec, a::Number) - scale!(v.vecs, a) - return v -end - -function VectorInterface.scale!(w::RecursiveVec, v::RecursiveVec, a::Number) - scale!(w.vecs, v.vecs, a) - return w -end - -function VectorInterface.scale!!(x::RecursiveVec, a::Number) - return RecursiveVec(scale!!(x.vecs, a)) -end - -function VectorInterface.scale!!(w::RecursiveVec, - v::RecursiveVec, a::Number) - return RecursiveVec(scale!!(w.vecs, v.vecs, a)) -end - -function VectorInterface.add(w::RecursiveVec, v::RecursiveVec, a::Number=One(), - b::Number=One()) - return RecursiveVec(add(w.vecs, v.vecs, a, b)) -end - -function VectorInterface.add!(w::RecursiveVec, v::RecursiveVec, a::Number=One(), - b::Number=One()) - add!(w.vecs, v.vecs, a, b) - return w -end - -function VectorInterface.add!!(w::RecursiveVec, v::RecursiveVec, - a::Number=One(), - b::Number=One()) - return RecursiveVec(add!!(w.vecs, v.vecs, a, b)) -end - -function VectorInterface.inner(v::RecursiveVec{T}, w::RecursiveVec{T}) where {T} - return inner(v.vecs, w.vecs) -end - -VectorInterface.norm(v::RecursiveVec) = VectorInterface.norm(v.vecs) diff --git a/test/ad.jl b/test/ad.jl deleted file mode 100644 index 11de1b96..00000000 --- a/test/ad.jl +++ /dev/null @@ -1,97 +0,0 @@ -module LinsolveAD -using KrylovKit, LinearAlgebra -using Random, Test -using ChainRulesCore, ChainRulesTestUtils, Zygote, FiniteDifferences - -fdm = ChainRulesTestUtils._fdm -tolerance(T::Type{<:Number}) = eps(real(T))^(2 / 3) -n = 10 -N = 30 - -function build_mat_example(A, b; tol=tolerance(eltype(A)), kwargs...) - Avec, A_fromvec = to_vec(A) - bvec, b_fromvec = to_vec(b) - T = eltype(A) - - function mat_example(Av, bv) - A′ = A_fromvec(Av) - b′ = b_fromvec(bv) - x, info = linsolve(A′, b′, zero(b′), GMRES(; tol=tol, kwargs...)) - info.converged == 0 && @warn "linsolve did not converge" - xv, = to_vec(x) - return xv - end - return mat_example, Avec, bvec -end - -function build_fun_example(A, b, c, d, e, f; tol=tolerance(eltype(A)), kwargs...) - Avec, matfromvec = to_vec(A) - bvec, vecfromvec = to_vec(b) - cvec, = to_vec(c) - dvec, = to_vec(d) - evec, scalarfromvec = to_vec(e) - fvec, = to_vec(f) - - function fun_example(Av, bv, cv, dv, ev, fv) - A′ = matfromvec(Av) - b′ = vecfromvec(bv) - c′ = vecfromvec(cv) - d′ = vecfromvec(dv) - e′ = scalarfromvec(ev) - f′ = scalarfromvec(fv) - - x, info = linsolve(b′, zero(b′), GMRES(; tol=tol, kwargs...), e′, f′) do y - return A′ * y + c′ * dot(d′, y) - end - # info.converged > 0 || @warn "not converged" - xv, = to_vec(x) - return xv - end - return fun_example, Avec, bvec, cvec, dvec, evec, fvec -end - -@testset "Small linsolve AD test" begin - @testset for T in (Float32, Float64, ComplexF32, ComplexF64) - A = 2 * (rand(T, (n, n)) .- one(T) / 2) - b = 2 * (rand(T, n) .- one(T) / 2) - b /= norm(b) - - mat_example, Avec, bvec = build_mat_example(A, b; tol=cond(A) * eps(real(T)), - krylovdim=n, maxiter=1) - - (JA, Jb) = FiniteDifferences.jacobian(fdm, mat_example, Avec, bvec) - (JA′, Jb′) = Zygote.jacobian(mat_example, Avec, bvec) - @test JA ≈ JA′ rtol = cond(A) * tolerance(T) - @test Jb ≈ Jb′ rtol = cond(A) * tolerance(T) - end -end - -@testset "Large linsolve AD test" begin - for T in (Float64, ComplexF64) - A = rand(T, (N, N)) .- one(T) / 2 - A = I - (9 // 10) * A / maximum(abs, eigvals(A)) - b = 2 * (rand(T, N) .- one(T) / 2) - c = 2 * (rand(T, N) .- one(T) / 2) - d = 2 * (rand(T, N) .- one(T) / 2) - e = rand(T) - f = rand(T) - - fun_example, Avec, bvec, cvec, dvec, evec, fvec = build_fun_example(A, b, c, d, e, - f; - tol=tolerance(T), - krylovdim=20) - - (JA, Jb, Jc, Jd, Je, Jf) = FiniteDifferences.jacobian(fdm, fun_example, - Avec, bvec, cvec, dvec, evec, - fvec) - (JA′, Jb′, Jc′, Jd′, Je′, Jf′) = Zygote.jacobian(fun_example, Avec, bvec, cvec, - dvec, evec, fvec) - @test JA ≈ JA′ - @test Jb ≈ Jb′ - @test Jc ≈ Jc′ - @test Jd ≈ Jd′ - @test Je ≈ Je′ - @test Jf ≈ Jf′ - end -end -end diff --git a/test/ad/degenerateeigsolve.jl b/test/ad/degenerateeigsolve.jl new file mode 100644 index 00000000..f3b98865 --- /dev/null +++ b/test/ad/degenerateeigsolve.jl @@ -0,0 +1,164 @@ +module DegenerateEigsolveAD + +using KrylovKit, LinearAlgebra +using Random, Test, TestExtras +using ChainRulesCore, ChainRulesTestUtils, Zygote, FiniteDifferences +using ..TestSetup +Random.seed!(987654321) + +fdm = ChainRulesTestUtils._fdm +n = 10 +N = 30 + +function build_mat_example(A, B, C, x, alg, alg_rrule) + howmany = 1 + which = :LM + + Avec, A_fromvec = to_vec(A) + Bvec, B_fromvec = to_vec(B) + Cvec, C_fromvec = to_vec(C) + xvec, x_fromvec = to_vec(x) + + M = [zero(A) zero(A) C; A zero(A) zero(A); zero(A) B zero(A)] + vals, vecs, info = eigsolve(M, x, howmany, which, alg) + info.converged < howmany && @warn "eigsolve did not converge" + + function mat_example(Av, Bv, Cv, xv) + à = A_fromvec(Av) + B̃ = B_fromvec(Bv) + C̃ = C_fromvec(Cv) + x̃ = x_fromvec(xv) + M̃ = [zero(Ã) zero(Ã) C̃; à zero(Ã) zero(Ã); zero(Ã) B̃ zero(Ã)] + vals′, vecs′, info′ = eigsolve(M̃, x̃, howmany, which, alg; alg_rrule=alg_rrule) + info′.converged < howmany && @warn "eigsolve did not converge" + catresults = vcat(vals′[1:howmany], vecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + + function mat_example_fun(Av, Bv, Cv, xv) + à = A_fromvec(Av) + B̃ = B_fromvec(Bv) + C̃ = C_fromvec(Cv) + x̃ = x_fromvec(xv) + M̃ = [zero(Ã) zero(Ã) C̃; à zero(Ã) zero(Ã); zero(Ã) B̃ zero(Ã)] + f = x -> M̃ * x + vals′, vecs′, info′ = eigsolve(f, x̃, howmany, which, alg; alg_rrule=alg_rrule) + info′.converged < howmany && @warn "eigsolve did not converge" + catresults = vcat(vals′[1:howmany], vecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + + function mat_example_fd(Av, Bv, Cv, xv) + à = A_fromvec(Av) + B̃ = B_fromvec(Bv) + C̃ = C_fromvec(Cv) + x̃ = x_fromvec(xv) + M̃ = [zero(Ã) zero(Ã) C̃; à zero(Ã) zero(Ã); zero(Ã) B̃ zero(Ã)] + howmany′ = (eltype(Av) <: Complex ? 3 : 6) * howmany + vals′, vecs′, info′ = eigsolve(M̃, x̃, howmany′, which, alg; alg_rrule=alg_rrule) + _, i = findmin(abs.(vals′ .- vals[1])) + info′.converged < i && @warn "eigsolve did not converge" + d = dot(vecs[1], vecs′[i]) + @assert abs(d) > sqrt(eps(real(eltype(A)))) + phasefix = abs(d) / d + vecs′[i] = vecs′[i] * phasefix + catresults = vcat(vals′[i:i], vecs′[i:i]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + + return mat_example, mat_example_fun, mat_example_fd, Avec, Bvec, Cvec, xvec, vals, + vecs +end + +@timedtestset "Degenerate eigsolve AD test with eltype=$T" for T in (Float64, ComplexF64) + n = 10 + N = 3n + + A = randn(T, n, n) + B = randn(T, n, n) + C = randn(T, n, n) + + M = [zeros(T, n, 2n) A; B zeros(T, n, 2n); zeros(T, n, n) C zeros(T, n, n)] + x = randn(T, N) + + tol = tolerance(T) #2 * N^2 * eps(real(T)) + alg = Arnoldi(; tol=tol, krylovdim=2n) + alg_rrule1 = Arnoldi(; tol=tol, krylovdim=2n, verbosity=0) + alg_rrule2 = GMRES(; tol=tol, krylovdim=2n, verbosity=0) + #! format: off + mat_example1, mat_example_fun1, mat_example_fd, Avec, Bvec, Cvec, xvec, vals, vecs = + build_mat_example(A, B, C, x, alg, alg_rrule1) + mat_example2, mat_example_fun2, mat_example_fd, Avec, Bvec, Cvec, xvec, vals, vecs = + build_mat_example(A, B, C, x, alg, alg_rrule2) + #! format: on + (JA, JB, JC, Jx) = FiniteDifferences.jacobian(fdm, mat_example_fd, Avec, Bvec, + Cvec, xvec) + (JA1, JB1, JC1, Jx1) = Zygote.jacobian(mat_example1, Avec, Bvec, Cvec, xvec) + (JA2, JB2, JC2, Jx2) = Zygote.jacobian(mat_example_fun1, Avec, Bvec, Cvec, xvec) + (JA3, JB3, JC3, Jx3) = Zygote.jacobian(mat_example2, Avec, Bvec, Cvec, xvec) + (JA4, JB4, JC4, Jx4) = Zygote.jacobian(mat_example_fun2, Avec, Bvec, Cvec, xvec) + + @test isapprox(JA, JA1; rtol=N * sqrt(eps(real(T)))) + @test isapprox(JB, JB1; rtol=N * sqrt(eps(real(T)))) + @test isapprox(JC, JC1; rtol=N * sqrt(eps(real(T)))) + + @test all(isapprox.(JA1, JA2; atol=n * eps(real(T)))) + @test all(isapprox.(JB1, JB2; atol=n * eps(real(T)))) + @test all(isapprox.(JC1, JC2; atol=n * eps(real(T)))) + + @test all(isapprox.(JA1, JA3; atol=tol)) + @test all(isapprox.(JB1, JB3; atol=tol)) + @test all(isapprox.(JC1, JC3; atol=tol)) + + @test all(isapprox.(JA1, JA4; atol=tol)) + @test all(isapprox.(JB1, JB4; atol=tol)) + @test all(isapprox.(JC1, JC4; atol=tol)) + + @test norm(Jx, Inf) < N * sqrt(eps(real(T))) + @test all(iszero, Jx1) + @test all(iszero, Jx2) + @test all(iszero, Jx3) + @test all(iszero, Jx4) + + # some analysis + ∂valsA = complex.(JA1[1, :], JA1[N + 2, :]) + ∂valsB = complex.(JB1[1, :], JB1[N + 2, :]) + ∂valsC = complex.(JC1[1, :], JC1[N + 2, :]) + ∂vecsA = complex.(JA1[1 .+ (1:N), :], JA1[N + 2 .+ (1:N), :]) + ∂vecsB = complex.(JB1[1 .+ (1:N), :], JB1[N + 2 .+ (1:N), :]) + ∂vecsC = complex.(JC1[1 .+ (1:N), :], JC1[N + 2 .+ (1:N), :]) + if T <: Complex # test holomorphicity / Cauchy-Riemann equations + # for eigenvalues + @test real(∂valsA[1:2:(2n^2)]) ≈ +imag(∂valsA[2:2:(2n^2)]) + @test imag(∂valsA[1:2:(2n^2)]) ≈ -real(∂valsA[2:2:(2n^2)]) + @test real(∂valsB[1:2:(2n^2)]) ≈ +imag(∂valsB[2:2:(2n^2)]) + @test imag(∂valsB[1:2:(2n^2)]) ≈ -real(∂valsB[2:2:(2n^2)]) + @test real(∂valsC[1:2:(2n^2)]) ≈ +imag(∂valsC[2:2:(2n^2)]) + @test imag(∂valsC[1:2:(2n^2)]) ≈ -real(∂valsC[2:2:(2n^2)]) + # and for eigenvectors + @test real(∂vecsA[:, 1:2:(2n^2)]) ≈ +imag(∂vecsA[:, 2:2:(2n^2)]) + @test imag(∂vecsA[:, 1:2:(2n^2)]) ≈ -real(∂vecsA[:, 2:2:(2n^2)]) + @test real(∂vecsB[:, 1:2:(2n^2)]) ≈ +imag(∂vecsB[:, 2:2:(2n^2)]) + @test imag(∂vecsB[:, 1:2:(2n^2)]) ≈ -real(∂vecsB[:, 2:2:(2n^2)]) + @test real(∂vecsC[:, 1:2:(2n^2)]) ≈ +imag(∂vecsC[:, 2:2:(2n^2)]) + @test imag(∂vecsC[:, 1:2:(2n^2)]) ≈ -real(∂vecsC[:, 2:2:(2n^2)]) + end + # test orthogonality of vecs and ∂vecs + @test all(isapprox.(abs.(vecs[1]' * ∂vecsA), 0; atol=sqrt(eps(real(T))))) + @test all(isapprox.(abs.(vecs[1]' * ∂vecsB), 0; atol=sqrt(eps(real(T))))) + @test all(isapprox.(abs.(vecs[1]' * ∂vecsC), 0; atol=sqrt(eps(real(T))))) +end + +end diff --git a/test/ad/eigsolve.jl b/test/ad/eigsolve.jl new file mode 100644 index 00000000..656226b7 --- /dev/null +++ b/test/ad/eigsolve.jl @@ -0,0 +1,406 @@ +module EigsolveAD +using KrylovKit, LinearAlgebra +using Random, Test, TestExtras +using ChainRulesCore, ChainRulesTestUtils, Zygote, FiniteDifferences +using ..TestSetup +Random.seed!(987654321) + +fdm = ChainRulesTestUtils._fdm +n = 10 +N = 30 + +function build_mat_example(A, x, howmany::Int, which, alg, alg_rrule) + Avec, A_fromvec = to_vec(A) + xvec, x_fromvec = to_vec(x) + + vals, vecs, info = eigsolve(A, x, howmany, which, alg) + info.converged < howmany && @warn "eigsolve did not converge" + if eltype(A) <: Real && length(vals) > howmany && + vals[howmany] == conj(vals[howmany + 1]) + howmany += 1 + end + + function mat_example(Av, xv) + à = A_fromvec(Av) + x̃ = x_fromvec(xv) + vals′, vecs′, info′ = eigsolve(Ã, x̃, howmany, which, alg; alg_rrule=alg_rrule) + info′.converged < howmany && @warn "eigsolve did not converge" + catresults = vcat(vals′[1:howmany], vecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + + function mat_example_fun(Av, xv) + à = A_fromvec(Av) + x̃ = x_fromvec(xv) + f = x -> à * x + vals′, vecs′, info′ = eigsolve(f, x̃, howmany, which, alg; alg_rrule=alg_rrule) + info′.converged < howmany && @warn "eigsolve did not converge" + catresults = vcat(vals′[1:howmany], vecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + + function mat_example_fd(Av, xv) + à = A_fromvec(Av) + x̃ = x_fromvec(xv) + vals′, vecs′, info′ = eigsolve(Ã, x̃, howmany, which, alg; alg_rrule=alg_rrule) + info′.converged < howmany && @warn "eigsolve did not converge" + for i in 1:howmany + d = dot(vecs[i], vecs′[i]) + @assert abs(d) > sqrt(eps(real(eltype(A)))) + phasefix = abs(d) / d + vecs′[i] = vecs′[i] * phasefix + end + catresults = vcat(vals′[1:howmany], vecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + + return mat_example, mat_example_fun, mat_example_fd, Avec, xvec, vals, vecs, howmany +end + +function testfun(A, x, c, d) + return A * x + c * dot(d, x) +end +testfunthunk(A, x, c, d) = testfun(A, x, c, d) +function ChainRulesCore.rrule(config::RuleConfig{>:HasReverseMode}, ::typeof(testfunthunk), + args...) + y = testfunthunk(args...) + function thunkedpb(dy) + pb = rrule_via_ad(config, testfun, args...)[2] + return map(z -> @thunk(z), pb(dy)) + end + return y, thunkedpb +end + +function build_fun_example(A, x, c, d, howmany::Int, which, alg, alg_rrule) + Avec, matfromvec = to_vec(A) + xvec, vecfromvec = to_vec(x) + cvec, = to_vec(c) + dvec, = to_vec(d) + + vals, vecs, info = eigsolve(x, howmany, which, alg) do y + return testfunthunk(A, y, c, d) + end + info.converged < howmany && @warn "eigsolve did not converge" + if eltype(A) <: Real && length(vals) > howmany && + vals[howmany] == conj(vals[howmany + 1]) + howmany += 1 + end + + fun_example_ad = let howmany′ = howmany + function (Av, xv, cv, dv) + à = matfromvec(Av) + x̃ = vecfromvec(xv) + c̃ = vecfromvec(cv) + d̃ = vecfromvec(dv) + + vals′, vecs′, info′ = eigsolve(x̃, howmany′, which, alg; + alg_rrule=alg_rrule) do y + return testfunthunk(Ã, y, c̃, d̃) + end + info′.converged < howmany′ && @warn "eigsolve did not converge" + catresults = vcat(vals′[1:howmany′], vecs′[1:howmany′]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + end + + fun_example_fd = let howmany′ = howmany + function (Av, xv, cv, dv) + à = matfromvec(Av) + x̃ = vecfromvec(xv) + c̃ = vecfromvec(cv) + d̃ = vecfromvec(dv) + + vals′, vecs′, info′ = eigsolve(x̃, howmany′, which, alg; + alg_rrule=alg_rrule) do y + return à * y + c̃ * dot(d̃, y) + end + info′.converged < howmany′ && @warn "eigsolve did not converge" + for i in 1:howmany′ + d = dot(vecs[i], vecs′[i]) + @assert abs(d) > sqrt(eps(real(eltype(A)))) + phasefix = abs(d) / d + vecs′[i] = vecs′[i] * phasefix + end + catresults = vcat(vals′[1:howmany′], vecs′[1:howmany′]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + end + + return fun_example_ad, fun_example_fd, Avec, xvec, cvec, dvec, vals, vecs, howmany +end + +function build_hermitianfun_example(A, x, c, howmany::Int, which, alg, alg_rrule) + Avec, matfromvec = to_vec(A) + xvec, xvecfromvec = to_vec(x) + cvec, cvecfromvec = to_vec(c) + + vals, vecs, info = eigsolve(x, howmany, which, alg) do y + return testfunthunk(Hermitian(A), y, c, c) + end + info.converged < howmany && @warn "eigsolve did not converge" + + function fun_example(Av, xv, cv) + à = matfromvec(Av) + x̃ = xvecfromvec(xv) + c̃ = cvecfromvec(cv) + + vals′, vecs′, info′ = eigsolve(x̃, howmany, which, alg; + alg_rrule=alg_rrule) do y + return testfunthunk(Hermitian(Ã), y, c̃, c̃) + end + info′.converged < howmany && @warn "eigsolve did not converge" + catresults = vcat(vals′[1:howmany], vecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + + function fun_example_fd(Av, xv, cv) + à = matfromvec(Av) + x̃ = xvecfromvec(xv) + c̃ = cvecfromvec(cv) + + vals′, vecs′, info′ = eigsolve(x̃, howmany, which, alg; + alg_rrule=alg_rrule) do y + return Hermitian(Ã) * y + c̃ * dot(c̃, y) + end + info′.converged < howmany && @warn "eigsolve did not converge" + for i in 1:howmany + d = dot(vecs[i], vecs′[i]) + @assert abs(d) > sqrt(eps(real(eltype(A)))) + phasefix = abs(d) / d + vecs′[i] = vecs′[i] * phasefix + end + catresults = vcat(vals′[1:howmany], vecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + + return fun_example, fun_example_fd, Avec, xvec, cvec, vals, vecs, howmany +end + +@timedtestset "Small eigsolve AD test for eltype=$T" for T in + (Float32, Float64, ComplexF32, + ComplexF64) + if T <: Complex + whichlist = (:LM, :SR, :LR, :SI, :LI) + else + whichlist = (:LM, :SR, :LR) + end + A = 2 * (rand(T, (n, n)) .- one(T) / 2) + x = 2 * (rand(T, n) .- one(T) / 2) + x /= norm(x) + + howmany = 3 + condA = cond(A) + tol = tolerance(T) # n * condA * (T <: Real ? eps(T) : 4 * eps(real(T))) + alg = Arnoldi(; tol=tol, krylovdim=n) + alg_rrule1 = Arnoldi(; tol=tol, krylovdim=2n, verbosity=0) + alg_rrule2 = GMRES(; tol=tol, krylovdim=n + 1, verbosity=0) + config = Zygote.ZygoteRuleConfig() + @testset for which in whichlist + for alg_rrule in (alg_rrule1, alg_rrule2) + # unfortunately, rrule does not seem type stable for function arguments, because the + # `rrule_via_ad` call does not produce type stable `rrule`s for the function + (vals, vecs, info), pb = ChainRulesCore.rrule(config, eigsolve, A, x, howmany, + which, alg; alg_rrule=alg_rrule) + # NOTE: the following is not necessary here, as it is corrected for in the `eigsolve` rrule + # if length(vals) > howmany && vals[howmany] == conj(vals[howmany + 1]) + # howmany += 1 + # end + @constinferred pb((ZeroTangent(), ZeroTangent(), NoTangent())) + @constinferred pb((randn(T, howmany), ZeroTangent(), NoTangent())) + @constinferred pb((randn(T, howmany), [randn(T, n)], NoTangent())) + @constinferred pb((randn(T, howmany), [randn(T, n) for _ in 1:howmany], + NoTangent())) + end + + for alg_rrule in (alg_rrule1, alg_rrule2) + #! format: off + mat_example, mat_example_fun, mat_example_fd, Avec, xvec, vals, vecs, howmany = + build_mat_example(A, x, howmany, which, alg, alg_rrule) + #! format: on + (JA, Jx) = FiniteDifferences.jacobian(fdm, mat_example_fd, Avec, xvec) + (JA1, Jx1) = Zygote.jacobian(mat_example, Avec, xvec) + (JA2, Jx2) = Zygote.jacobian(mat_example_fun, Avec, xvec) + + # finite difference comparison using some kind of tolerance heuristic + @test isapprox(JA, JA1; rtol=condA * sqrt(eps(real(T)))) + @test all(isapprox.(JA1, JA2; atol=n * eps(real(T)))) + @test norm(Jx, Inf) < condA * sqrt(eps(real(T))) + @test all(iszero, Jx1) + @test all(iszero, Jx2) + + # some analysis + ∂vals = complex.(JA1[1:howmany, :], JA1[howmany * (n + 1) .+ (1:howmany), :]) + ∂vecs = map(1:howmany) do i + return complex.(JA1[(howmany + (i - 1) * n) .+ (1:n), :], + JA1[(howmany * (n + 2) + (i - 1) * n) .+ (1:n), :]) + end + if eltype(A) <: Complex # test holomorphicity / Cauchy-Riemann equations + # for eigenvalues + @test real(∂vals[:, 1:2:(2n^2)]) ≈ +imag(∂vals[:, 2:2:(2n^2)]) + @test imag(∂vals[:, 1:2:(2n^2)]) ≈ -real(∂vals[:, 2:2:(2n^2)]) + # and for eigenvectors + for i in 1:howmany + @test real(∂vecs[i][:, 1:2:(2n^2)]) ≈ +imag(∂vecs[i][:, 2:2:(2n^2)]) + @test imag(∂vecs[i][:, 1:2:(2n^2)]) ≈ -real(∂vecs[i][:, 2:2:(2n^2)]) + end + end + # test orthogonality of vecs and ∂vecs + for i in 1:howmany + @test all(isapprox.(abs.(vecs[i]' * ∂vecs[i]), 0; atol=sqrt(eps(real(T))))) + end + end + end + + if T <: Complex + @testset "test warnings and info" begin + alg = Arnoldi(; tol=tol, krylovdim=n, verbosity=0) + alg_rrule = Arnoldi(; tol=tol, krylovdim=n, verbosity=0) + (vals, vecs, info), pb = ChainRulesCore.rrule(config, eigsolve, A, x, howmany, + :LR, alg; alg_rrule=alg_rrule) + @test_logs pb((ZeroTangent(), im .* vecs[1:2] .+ vecs[2:-1:1], NoTangent())) + + alg = Arnoldi(; tol=tol, krylovdim=n, verbosity=0) + alg_rrule = Arnoldi(; tol=tol, krylovdim=n, verbosity=1) + (vals, vecs, info), pb = ChainRulesCore.rrule(config, eigsolve, A, x, howmany, + :LR, alg; alg_rrule=alg_rrule) + @test_logs (:warn,) pb((ZeroTangent(), im .* vecs[1:2] .+ vecs[2:-1:1], + NoTangent())) + pbs = @test_logs pb((ZeroTangent(), vecs[1:2], NoTangent())) + @test norm(unthunk(pbs[1]), Inf) < condA * sqrt(eps(real(T))) + + alg = Arnoldi(; tol=tol, krylovdim=n, verbosity=1) + alg_rrule = Arnoldi(; tol=tol, krylovdim=n, verbosity=2) + (vals, vecs, info), pb = ChainRulesCore.rrule(config, eigsolve, A, x, howmany, + :LR, alg; alg_rrule=alg_rrule) + @test_logs (:warn,) (:info,) pb((ZeroTangent(), im .* vecs[1:2] .+ vecs[2:-1:1], + NoTangent())) + pbs = @test_logs (:info,) pb((ZeroTangent(), vecs[1:2], NoTangent())) + @test norm(unthunk(pbs[1]), Inf) < condA * sqrt(eps(real(T))) + + alg = Arnoldi(; tol=tol, krylovdim=n, verbosity=0) + alg_rrule = GMRES(; tol=tol, krylovdim=n, verbosity=0) + (vals, vecs, info), pb = ChainRulesCore.rrule(config, eigsolve, A, x, howmany, + :LR, alg; alg_rrule=alg_rrule) + @test_logs pb((ZeroTangent(), im .* vecs[1:2] .+ vecs[2:-1:1], NoTangent())) + + alg = Arnoldi(; tol=tol, krylovdim=n, verbosity=0) + alg_rrule = GMRES(; tol=tol, krylovdim=n, verbosity=1) + (vals, vecs, info), pb = ChainRulesCore.rrule(config, eigsolve, A, x, howmany, + :LR, alg; alg_rrule=alg_rrule) + @test_logs (:warn,) (:warn,) pb((ZeroTangent(), + im .* vecs[1:2] .+ + vecs[2:-1:1], + NoTangent())) + pbs = @test_logs pb((ZeroTangent(), vecs[1:2], NoTangent())) + @test norm(unthunk(pbs[1]), Inf) < condA * sqrt(eps(real(T))) + + alg = Arnoldi(; tol=tol, krylovdim=n, verbosity=1) + alg_rrule = GMRES(; tol=tol, krylovdim=n, verbosity=2) + (vals, vecs, info), pb = ChainRulesCore.rrule(config, eigsolve, A, x, howmany, + :LR, alg; alg_rrule=alg_rrule) + @test_logs (:warn,) (:info,) (:info,) (:warn,) (:info,) (:info,) pb((ZeroTangent(), + im .* + vecs[1:2] .+ + vecs[2:-1:1], + NoTangent())) + pbs = @test_logs (:info,) (:info,) pb((ZeroTangent(), vecs[1:2], NoTangent())) + @test norm(unthunk(pbs[1]), Inf) < condA * sqrt(eps(real(T))) + end + end +end +@timedtestset "Large eigsolve AD test with eltype=$T" for T in (Float64, ComplexF64) + if T <: Complex + whichlist = (:LM, :SI) + else + whichlist = (:LM, :SR) + end + @testset for which in whichlist + A = rand(T, (N, N)) .- one(T) / 2 + A = I - (9 // 10) * A / maximum(abs, eigvals(A)) + x = 2 * (rand(T, N) .- one(T) / 2) + x /= norm(x) + c = 2 * (rand(T, N) .- one(T) / 2) + d = 2 * (rand(T, N) .- one(T) / 2) + + howmany = 2 + tol = tolerance(T) # 2 * N^2 * eps(real(T)) + alg = Arnoldi(; tol=tol, krylovdim=2n) + alg_rrule1 = Arnoldi(; tol=tol, krylovdim=2n, verbosity=0) + alg_rrule2 = GMRES(; tol=tol, krylovdim=2n, verbosity=0) + @testset for alg_rrule in (alg_rrule1, alg_rrule2) + #! format: off + fun_example, fun_example_fd, Avec, xvec, cvec, dvec, vals, vecs, howmany = + build_fun_example(A, x, c, d, howmany, which, alg, alg_rrule) + #! format: on + + (JA, Jx, Jc, Jd) = FiniteDifferences.jacobian(fdm, fun_example_fd, Avec, xvec, + cvec, dvec) + (JA′, Jx′, Jc′, Jd′) = Zygote.jacobian(fun_example, Avec, xvec, cvec, dvec) + + rtol = cond(A + c * d') * sqrt(eps(real(T))) + @test isapprox(JA, JA′; rtol=rtol) + @test isapprox(Jc, Jc′; rtol=rtol) + @test isapprox(Jd, Jd′; rtol=rtol) + end + end +end +@timedtestset "Large Hermitian eigsolve AD test with eltype=$T" for T in + (Float64, ComplexF64) + whichlist = (:LR, :SR) + @testset for which in whichlist + A = rand(T, (N, N)) .- one(T) / 2 + A = I - (9 // 10) * A / maximum(abs, eigvals(A)) + x = 2 * (rand(T, N) .- one(T) / 2) + x /= norm(x) + c = 2 * (rand(T, N) .- one(T) / 2) + + howmany = 2 + tol = tolerance(T) + alg = Lanczos(; tol=tol, krylovdim=2n) + alg_rrule1 = Arnoldi(; tol=tol, krylovdim=2n, verbosity=0) + alg_rrule2 = GMRES(; tol=tol, krylovdim=2n, verbosity=0) + @testset for alg_rrule in (alg_rrule1, alg_rrule2) + #! format: off + fun_example, fun_example_fd, Avec, xvec, cvec, vals, vecs, howmany = + build_hermitianfun_example(A, x, c, howmany, which, alg, alg_rrule) + #! format: on + + (JA, Jx, Jc) = FiniteDifferences.jacobian(fdm, fun_example_fd, Avec, xvec, + cvec) + (JA′, Jx′, Jc′) = Zygote.jacobian(fun_example, Avec, xvec, cvec) + rtol = cond(A + c * c') * sqrt(eps(real(T))) + @test isapprox(JA, JA′; rtol=rtol) + @test isapprox(Jc, Jc′; rtol=rtol) + end + end +end + +end diff --git a/test/ad/linsolve.jl b/test/ad/linsolve.jl new file mode 100644 index 00000000..a29f5e45 --- /dev/null +++ b/test/ad/linsolve.jl @@ -0,0 +1,146 @@ +module LinsolveAD +using KrylovKit, LinearAlgebra +using Random, Test, TestExtras +using ChainRulesCore, ChainRulesTestUtils, Zygote, FiniteDifferences +using ..TestSetup + +fdm = ChainRulesTestUtils._fdm +n = 10 +N = 30 + +function build_mat_example(A, b, x, alg, alg_rrule) + Avec, A_fromvec = to_vec(A) + bvec, b_fromvec = to_vec(b) + xvec, x_fromvec = to_vec(x) + T = eltype(A) + + function mat_example(Av, bv, xv) + à = A_fromvec(Av) + b̃ = b_fromvec(bv) + x̃ = x_fromvec(xv) + x, info = linsolve(Ã, b̃, x̃, alg; alg_rrule=alg_rrule) + if info.converged == 0 + @warn "linsolve did not converge:" + println("normres = ", info.normres) + end + xv, = to_vec(x) + return xv + end + function mat_example_fun(Av, bv, xv) + à = A_fromvec(Av) + b̃ = b_fromvec(bv) + x̃ = x_fromvec(xv) + f = x -> à * x + x, info = linsolve(f, b̃, x̃, alg; alg_rrule=alg_rrule) + if info.converged == 0 + @warn "linsolve did not converge:" + println("normres = ", info.normres) + end + xv, = to_vec(x) + return xv + end + return mat_example, mat_example_fun, Avec, bvec, xvec +end + +function testfun(A, x, c, d) + return A * x + c * dot(d, x) +end +testfunthunk(A, x, c, d) = testfun(A, x, c, d) +function ChainRulesCore.rrule(config::RuleConfig{>:HasReverseMode}, ::typeof(testfunthunk), + args...) + y = testfunthunk(args...) + function thunkedpb(dy) + pb = rrule_via_ad(config, testfun, args...)[2] + return map(z -> @thunk(z), pb(dy)) + end + return y, thunkedpb +end + +function build_fun_example(A, b, c, d, e, f, alg, alg_rrule) + Avec, matfromvec = to_vec(A) + bvec, vecfromvec = to_vec(b) + cvec, = to_vec(c) + dvec, = to_vec(d) + evec, scalarfromvec = to_vec(e) + fvec, = to_vec(f) + + function fun_example(Av, bv, cv, dv, ev, fv) + à = matfromvec(Av) + b̃ = vecfromvec(bv) + c̃ = vecfromvec(cv) + d̃ = vecfromvec(dv) + ẽ = scalarfromvec(ev) + f̃ = scalarfromvec(fv) + + x, info = linsolve(b̃, zero(b̃), alg, ẽ, f̃; alg_rrule=alg_rrule) do y + return testfunthunk(Ã, y, c̃, d̃) + end + # info.converged > 0 || @warn "not converged" + xv, = to_vec(x) + return xv + end + return fun_example, Avec, bvec, cvec, dvec, evec, fvec +end + +@testset "Small linsolve AD test with eltype=$T" for T in (Float32, Float64, ComplexF32, + ComplexF64) + A = 2 * (rand(T, (n, n)) .- one(T) / 2) + b = 2 * (rand(T, n) .- one(T) / 2) + b /= norm(b) + x = 2 * (rand(T, n) .- one(T) / 2) + + condA = cond(A) + tol = tolerance(T) #condA * (T <: Real ? eps(T) : 4 * eps(real(T))) + alg = GMRES(; tol=tol, krylovdim=n, maxiter=1) + + config = Zygote.ZygoteRuleConfig() + _, pb = ChainRulesCore.rrule(config, linsolve, A, b, x, alg, 0, 1; alg_rrule=alg) + @constinferred pb((ZeroTangent(), NoTangent())) + @constinferred pb((rand(T, n), NoTangent())) + + mat_example, mat_example_fun, Avec, bvec, xvec = build_mat_example(A, b, x, alg, alg) + (JA, Jb, Jx) = FiniteDifferences.jacobian(fdm, mat_example, Avec, bvec, xvec) + (JA1, Jb1, Jx1) = Zygote.jacobian(mat_example, Avec, bvec, xvec) + (JA2, Jb2, Jx2) = Zygote.jacobian(mat_example_fun, Avec, bvec, xvec) + + @test isapprox(JA, JA1; rtol=condA * sqrt(eps(real(T)))) + @test all(isapprox.(JA1, JA2; atol=n * eps(real(T)))) + # factor 2 is minimally necessary for complex case, but 3 is more robust + @test norm(Jx, Inf) < condA * sqrt(eps(real(T))) + @test all(iszero, Jx1) +end + +@testset "Large linsolve AD test with eltype=$T" for T in (Float64, ComplexF64) + A = rand(T, (N, N)) .- one(T) / 2 + A = I - (9 // 10) * A / maximum(abs, eigvals(A)) + b = 2 * (rand(T, N) .- one(T) / 2) + c = 2 * (rand(T, N) .- one(T) / 2) + d = 2 * (rand(T, N) .- one(T) / 2) + e = rand(T) + f = rand(T) + + # mix algorithms] + tol = tolerance(T) # N^2 * eps(real(T)) + alg1 = GMRES(; tol=tol, krylovdim=20) + alg2 = BiCGStab(; tol=tol, maxiter=100) # BiCGStab seems to require slightly smaller tolerance for tests to work + for (alg, alg_rrule) in ((alg1, alg2), (alg2, alg1)) + #! format: off + fun_example, Avec, bvec, cvec, dvec, evec, fvec = + build_fun_example(A, b, c, d, e, f, alg, alg_rrule) + #! format: on + + (JA, Jb, Jc, Jd, Je, Jf) = FiniteDifferences.jacobian(fdm, fun_example, + Avec, bvec, cvec, dvec, evec, + fvec) + (JA′, Jb′, Jc′, Jd′, Je′, Jf′) = Zygote.jacobian(fun_example, Avec, bvec, cvec, + dvec, evec, fvec) + rtol = 2 * cond(A + c * d') * sqrt(eps(real(T))) + @test isapprox(JA, JA′; rtol=rtol) + @test isapprox(Jb, Jb′; rtol=rtol) + @test isapprox(Jc, Jc′; rtol=rtol) + @test isapprox(Jd, Jd′; rtol=rtol) + @test isapprox(Je, Je′; rtol=rtol) + @test isapprox(Jf, Jf′; rtol=rtol) + end +end +end diff --git a/test/ad/svdsolve.jl b/test/ad/svdsolve.jl new file mode 100644 index 00000000..bf6ec38f --- /dev/null +++ b/test/ad/svdsolve.jl @@ -0,0 +1,381 @@ +module SvdsolveAD +using KrylovKit, LinearAlgebra +using Random, Test, TestExtras +using ChainRulesCore, ChainRulesTestUtils, Zygote, FiniteDifferences +Random.seed!(123456789) + +fdm = ChainRulesTestUtils._fdm +n = 10 +N = 30 + +function build_mat_example(A, x, howmany::Int, alg, alg_rrule) + Avec, A_fromvec = to_vec(A) + xvec, x_fromvec = to_vec(x) + + vals, lvecs, rvecs, info = svdsolve(A, x, howmany, :LR, alg) + info.converged < howmany && @warn "svdsolve did not converge" + + function mat_example_mat(Av, xv) + à = A_fromvec(Av) + x̃ = x_fromvec(xv) + vals′, lvecs′, rvecs′, info′ = svdsolve(Ã, x̃, howmany, :LR, alg; + alg_rrule=alg_rrule) + info′.converged < howmany && @warn "svdsolve did not converge" + catresults = vcat(vals′[1:howmany], lvecs′[1:howmany]..., rvecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + function mat_example_fval(Av, xv) + à = A_fromvec(Av) + x̃ = x_fromvec(xv) + f = (x, adj::Val) -> (adj isa Val{true}) ? adjoint(Ã) * x : à * x + vals′, lvecs′, rvecs′, info′ = svdsolve(f, x̃, howmany, :LR, alg; + alg_rrule=alg_rrule) + info′.converged < howmany && @warn "svdsolve did not converge" + catresults = vcat(vals′[1:howmany], lvecs′[1:howmany]..., rvecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + function mat_example_ftuple(Av, xv) + à = A_fromvec(Av) + x̃ = x_fromvec(xv) + (f, fᴴ) = (x -> à * x, x -> adjoint(Ã) * x) + vals′, lvecs′, rvecs′, info′ = svdsolve((f, fᴴ), x̃, howmany, :LR, alg; + alg_rrule=alg_rrule) + info′.converged < howmany && @warn "svdsolve did not converge" + catresults = vcat(vals′[1:howmany], lvecs′[1:howmany]..., rvecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + + function mat_example_fd(Av, xv) + à = A_fromvec(Av) + x̃ = x_fromvec(xv) + vals′, lvecs′, rvecs′, info′ = svdsolve(Ã, x̃, howmany, :LR, alg; + alg_rrule=alg_rrule) + info′.converged < howmany && @warn "svdsolve did not converge" + for i in 1:howmany + dl = dot(lvecs[i], lvecs′[i]) + dr = dot(rvecs[i], rvecs′[i]) + @assert abs(dl) > sqrt(eps(real(eltype(A)))) + @assert abs(dr) > sqrt(eps(real(eltype(A)))) + phasefix = sqrt(abs(dl * dr) / (dl * dr)) + lvecs′[i] = lvecs′[i] * phasefix + rvecs′[i] = rvecs′[i] * phasefix + end + catresults = vcat(vals′[1:howmany], lvecs′[1:howmany]..., rvecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + + return mat_example_mat, mat_example_ftuple, mat_example_fval, mat_example_fd, Avec, + xvec, vals, lvecs, rvecs +end + +function build_fun_example(A, x, c, d, howmany::Int, alg, alg_rrule) + Avec, matfromvec = to_vec(A) + xvec, xvecfromvec = to_vec(x) + cvec, cvecfromvec = to_vec(c) + dvec, dvecfromvec = to_vec(d) + + f = y -> A * y + c * dot(d, y) + fᴴ = y -> adjoint(A) * y + d * dot(c, y) + vals, lvecs, rvecs, info = svdsolve((f, fᴴ), x, howmany, :LR, alg) + info.converged < howmany && @warn "svdsolve did not converge" + + function fun_example_ad(Av, xv, cv, dv) + à = matfromvec(Av) + x̃ = xvecfromvec(xv) + c̃ = cvecfromvec(cv) + d̃ = dvecfromvec(dv) + + f = y -> à * y + c̃ * dot(d̃, y) + fᴴ = y -> adjoint(Ã) * y + d̃ * dot(c̃, y) + vals′, lvecs′, rvecs′, info′ = svdsolve((f, fᴴ), x̃, howmany, :LR, alg; + alg_rrule=alg_rrule) + info′.converged < howmany && @warn "svdsolve did not converge" + catresults = vcat(vals′[1:howmany], lvecs′[1:howmany]..., rvecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + function fun_example_fd(Av, xv, cv, dv) + à = matfromvec(Av) + x̃ = xvecfromvec(xv) + c̃ = cvecfromvec(cv) + d̃ = dvecfromvec(dv) + + f = y -> à * y + c̃ * dot(d̃, y) + fᴴ = y -> adjoint(Ã) * y + d̃ * dot(c̃, y) + vals′, lvecs′, rvecs′, info′ = svdsolve((f, fᴴ), x̃, howmany, :LR, alg; + alg_rrule=alg_rrule) + info′.converged < howmany && @warn "svdsolve did not converge" + for i in 1:howmany + dl = dot(lvecs[i], lvecs′[i]) + dr = dot(rvecs[i], rvecs′[i]) + @assert abs(dl) > sqrt(eps(real(eltype(A)))) + @assert abs(dr) > sqrt(eps(real(eltype(A)))) + phasefix = sqrt(abs(dl * dr) / (dl * dr)) + lvecs′[i] = lvecs′[i] * phasefix + rvecs′[i] = rvecs′[i] * phasefix + end + catresults = vcat(vals′[1:howmany], lvecs′[1:howmany]..., rvecs′[1:howmany]...) + if eltype(catresults) <: Complex + return vcat(real(catresults), imag(catresults)) + else + return catresults + end + end + return fun_example_ad, fun_example_fd, Avec, xvec, cvec, dvec, vals, lvecs, rvecs +end + +@timedtestset "Small svdsolve AD test with eltype=$T" for T in + (Float32, Float64, ComplexF32, + ComplexF64) + A = 2 * (rand(T, (n, 2 * n)) .- one(T) / 2) + x = 2 * (rand(T, n) .- one(T) / 2) + x /= norm(x) + condA = cond(A) + + howmany = 3 + tol = 3 * n * condA * (T <: Real ? eps(T) : 4 * eps(real(T))) + alg = GKL(; krylovdim=2n, tol=tol) + alg_rrule1 = Arnoldi(; tol=tol, krylovdim=4n, verbosity=0) + alg_rrule2 = GMRES(; tol=tol, krylovdim=3n, verbosity=0) + config = Zygote.ZygoteRuleConfig() + for alg_rrule in (alg_rrule1, alg_rrule2) + # unfortunately, rrule does not seem type stable for function arguments, because the + # `rrule_via_ad` call does not produce type stable `rrule`s for the function + _, pb = ChainRulesCore.rrule(config, svdsolve, A, x, howmany, :LR, alg; + alg_rrule=alg_rrule) + @constinferred pb((ZeroTangent(), ZeroTangent(), ZeroTangent(), NoTangent())) + @constinferred pb((randn(real(T), howmany), ZeroTangent(), ZeroTangent(), + NoTangent())) + @constinferred pb((randn(real(T), howmany), [randn(T, n)], ZeroTangent(), + NoTangent())) + @constinferred pb((randn(real(T), howmany), [randn(T, n) for _ in 1:howmany], + [randn(T, 2 * n) for _ in 1:howmany], NoTangent())) + end + for alg_rrule in (alg_rrule1, alg_rrule2) + (mat_example_mat, mat_example_ftuple, mat_example_fval, mat_example_fd, + Avec, xvec, vals, lvecs, rvecs) = build_mat_example(A, x, howmany, alg, alg_rrule) + + (JA, Jx) = FiniteDifferences.jacobian(fdm, mat_example_fd, Avec, xvec) + (JA1, Jx1) = Zygote.jacobian(mat_example_mat, Avec, xvec) + (JA2, Jx2) = Zygote.jacobian(mat_example_fval, Avec, xvec) + (JA3, Jx3) = Zygote.jacobian(mat_example_ftuple, Avec, xvec) + + # finite difference comparison using some kind of tolerance heuristic + @test isapprox(JA, JA1; rtol=3 * n * n * condA * sqrt(eps(real(T)))) + @test all(isapprox.(JA1, JA2; atol=n * eps(real(T)))) + @test all(isapprox.(JA1, JA3; atol=n * eps(real(T)))) + @test norm(Jx, Inf) < 5 * condA * sqrt(eps(real(T))) + @test all(iszero, Jx1) + @test all(iszero, Jx2) + @test all(iszero, Jx3) + + # some analysis + if eltype(A) <: Complex # test holomorphicity / Cauchy-Riemann equations + ∂vals = complex.(JA1[1:howmany, :], + JA1[howmany * (3 * n + 1) .+ (1:howmany), :]) + ∂lvecs = map(1:howmany) do i + return complex.(JA1[(howmany + (i - 1) * n) .+ (1:n), :], + JA1[(howmany * (3 * n + 2) + (i - 1) * n) .+ (1:n), :]) + end + ∂rvecs = map(1:howmany) do i + return complex.(JA1[(howmany * (n + 1) + (i - 1) * (2 * n)) .+ (1:(2n)), :], + JA1[(howmany * (4 * n + 2) + (i - 1) * 2n) .+ (1:(2n)), :]) + end + else + ∂vals = JA1[1:howmany, :] + ∂lvecs = map(1:howmany) do i + return JA1[(howmany + (i - 1) * n) .+ (1:n), :] + end + ∂rvecs = map(1:howmany) do i + return JA1[(howmany * (n + 1) + (i - 1) * (2 * n)) .+ (1:(2n)), :] + end + end + # test orthogonality of vecs and ∂vecs + for i in 1:howmany + prec = 4 * cond(A) * sqrt(eps(real(T))) + @test all(<(prec), real.(lvecs[i]' * ∂lvecs[i])) + @test all(<(prec), real.(rvecs[i]' * ∂rvecs[i])) + @test all(<(prec), abs.(lvecs[i]' * ∂lvecs[i] + rvecs[i]' * ∂rvecs[i])) + end + end + if T <: Complex + @testset "test warnings and info" begin + alg = GKL(; krylovdim=2n, tol=tol, verbosity=0) + alg_rrule = Arnoldi(; tol=tol, krylovdim=4n, verbosity=0) + (vals, lvecs, rvecs, info), pb = ChainRulesCore.rrule(config, svdsolve, A, x, + howmany, :LR, alg; + alg_rrule=alg_rrule) + @test_logs pb((ZeroTangent(), im .* lvecs[1:2] .+ lvecs[2:-1:1], ZeroTangent(), + NoTangent())) + + alg = GKL(; krylovdim=2n, tol=tol, verbosity=1) + alg_rrule = Arnoldi(; tol=tol, krylovdim=4n, verbosity=1) + (vals, lvecs, rvecs, info), pb = ChainRulesCore.rrule(config, svdsolve, A, x, + howmany, :LR, alg; + alg_rrule=alg_rrule) + @test_logs (:warn,) pb((ZeroTangent(), + im .* lvecs[1:2] .+ lvecs[2:-1:1], + ZeroTangent(), + NoTangent())) + @test_logs (:warn,) pb((ZeroTangent(), lvecs[2:-1:1], + im .* rvecs[1:2] .+ rvecs[2:-1:1], + ZeroTangent(), + NoTangent())) + @test_logs pb((ZeroTangent(), lvecs[1:2] .+ lvecs[2:-1:1], + ZeroTangent(), + NoTangent())) + @test_logs (:warn,) pb((ZeroTangent(), + im .* lvecs[1:2] .+ lvecs[2:-1:1], + +im .* rvecs[1:2] + rvecs[2:-1:1], + NoTangent())) + @test_logs pb((ZeroTangent(), (1 + im) .* lvecs[1:2] .+ lvecs[2:-1:1], + (1 - im) .* rvecs[1:2] + rvecs[2:-1:1], + NoTangent())) + + alg = GKL(; krylovdim=2n, tol=tol, verbosity=1) + alg_rrule = Arnoldi(; tol=tol, krylovdim=4n, verbosity=2) + (vals, lvecs, rvecs, info), pb = ChainRulesCore.rrule(config, svdsolve, A, x, + howmany, :LR, alg; + alg_rrule=alg_rrule) + @test_logs (:warn,) (:info,) pb((ZeroTangent(), + im .* lvecs[1:2] .+ lvecs[2:-1:1], + ZeroTangent(), + NoTangent())) + @test_logs (:warn,) (:info,) pb((ZeroTangent(), lvecs[2:-1:1], + im .* rvecs[1:2] .+ rvecs[2:-1:1], + ZeroTangent(), + NoTangent())) + @test_logs (:info,) pb((ZeroTangent(), lvecs[1:2] .+ lvecs[2:-1:1], + ZeroTangent(), + NoTangent())) + @test_logs (:warn,) (:info,) pb((ZeroTangent(), + im .* lvecs[1:2] .+ lvecs[2:-1:1], + +im .* rvecs[1:2] + rvecs[2:-1:1], + NoTangent())) + @test_logs (:info,) pb((ZeroTangent(), (1 + im) .* lvecs[1:2] .+ lvecs[2:-1:1], + (1 - im) .* rvecs[1:2] + rvecs[2:-1:1], + NoTangent())) + + alg = GKL(; krylovdim=2n, tol=tol, verbosity=0) + alg_rrule = GMRES(; tol=tol, krylovdim=3n, verbosity=0) + (vals, lvecs, rvecs, info), pb = ChainRulesCore.rrule(config, svdsolve, A, x, + howmany, :LR, alg; + alg_rrule=alg_rrule) + @test_logs pb((ZeroTangent(), im .* lvecs[1:2] .+ lvecs[2:-1:1], ZeroTangent(), + NoTangent())) + + alg = GKL(; krylovdim=2n, tol=tol, verbosity=1) + alg_rrule = GMRES(; tol=tol, krylovdim=3n, verbosity=1) + (vals, lvecs, rvecs, info), pb = ChainRulesCore.rrule(config, svdsolve, A, x, + howmany, :LR, alg; + alg_rrule=alg_rrule) + @test_logs (:warn,) (:warn,) pb((ZeroTangent(), + im .* lvecs[1:2] .+ + lvecs[2:-1:1], ZeroTangent(), + NoTangent())) + @test_logs (:warn,) (:warn,) pb((ZeroTangent(), lvecs[2:-1:1], + im .* rvecs[1:2] .+ + rvecs[2:-1:1], ZeroTangent(), + NoTangent())) + @test_logs pb((ZeroTangent(), lvecs[1:2] .+ lvecs[2:-1:1], + ZeroTangent(), + NoTangent())) + @test_logs (:warn,) (:warn,) pb((ZeroTangent(), + im .* lvecs[1:2] .+ + lvecs[2:-1:1], + +im .* rvecs[1:2] + + rvecs[2:-1:1], + NoTangent())) + @test_logs pb((ZeroTangent(), + (1 + im) .* lvecs[1:2] .+ lvecs[2:-1:1], + (1 - im) .* rvecs[1:2] + rvecs[2:-1:1], + NoTangent())) + + alg = GKL(; krylovdim=2n, tol=tol, verbosity=1) + alg_rrule = GMRES(; tol=tol, krylovdim=3n, verbosity=2) + (vals, lvecs, rvecs, info), pb = ChainRulesCore.rrule(config, svdsolve, A, x, + howmany, :LR, alg; + alg_rrule=alg_rrule) + @test_logs (:warn,) (:info,) (:info,) (:warn,) (:info,) (:info,) pb((ZeroTangent(), + im .* + lvecs[1:2] .+ + lvecs[2:-1:1], + ZeroTangent(), + NoTangent())) + @test_logs (:warn,) (:info,) (:info,) (:warn,) (:info,) (:info,) pb((ZeroTangent(), + lvecs[2:-1:1], + im .* + rvecs[1:2] .+ + rvecs[2:-1:1], + ZeroTangent(), + NoTangent())) + @test_logs (:info,) (:info,) (:info,) (:info,) pb((ZeroTangent(), + lvecs[1:2] .+ lvecs[2:-1:1], + ZeroTangent(), + NoTangent())) + @test_logs (:warn,) (:info,) (:info,) (:warn,) (:info,) (:info,) pb((ZeroTangent(), + im .* + lvecs[1:2] .+ + lvecs[2:-1:1], + +im .* + rvecs[1:2] + + rvecs[2:-1:1], + NoTangent())) + @test_logs (:info,) (:info,) (:info,) (:info,) pb((ZeroTangent(), + (1 + im) .* lvecs[1:2] .+ + lvecs[2:-1:1], + (1 - im) .* rvecs[1:2] + + rvecs[2:-1:1], + NoTangent())) + end + end +end +@timedtestset "Large svdsolve AD test with eltype=$T" for T in (Float64, ComplexF64) + which = :LR + A = rand(T, (N, N + n)) .- one(T) / 2 + A = I[1:N, 1:(N + n)] - (9 // 10) * A / maximum(svdvals(A)) + x = 2 * (rand(T, N) .- one(T) / 2) + x /= norm(x) + c = 2 * (rand(T, N) .- one(T) / 2) + d = 2 * (rand(T, N + n) .- one(T) / 2) + + howmany = 2 + tol = 2 * N^2 * eps(real(T)) + alg = GKL(; tol=tol, krylovdim=2n) + alg_rrule1 = Arnoldi(; tol=tol, krylovdim=2n, verbosity=-1) + alg_rrule2 = GMRES(; tol=tol, krylovdim=2n, verbosity=-1) + for alg_rrule in (alg_rrule1, alg_rrule2) + #! format: off + fun_example_ad, fun_example_fd, Avec, xvec, cvec, dvec, vals, lvecs, rvecs = + build_fun_example(A, x, c, d, howmany, alg, alg_rrule) + #! format: on + + (JA, Jx, Jc, Jd) = FiniteDifferences.jacobian(fdm, fun_example_fd, Avec, xvec, + cvec, dvec) + (JA′, Jx′, Jc′, Jd′) = Zygote.jacobian(fun_example_ad, Avec, xvec, cvec, dvec) + @test JA ≈ JA′ + @test Jc ≈ Jc′ + @test Jd ≈ Jd′ + @test norm(Jx, Inf) < (T <: Complex ? 4n : n) * sqrt(eps(real(T))) + end +end +end diff --git a/test/eigsolve.jl b/test/eigsolve.jl index 717d95eb..1e2aa209 100644 --- a/test/eigsolve.jl +++ b/test/eigsolve.jl @@ -8,15 +8,38 @@ A = (A + A') / 2 v = rand(T, (n,)) n1 = div(n, 2) - D1, V1, info = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, :SR; - krylovdim=n, - maxiter=1, tol=tolerance(T), verbosity=2) + alg = Lanczos(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T), + verbosity=2) + D1, V1, info = @test_logs (:info,) eigsolve(wrapop(A, Val(mode)), + wrapvec(v, Val(mode)), n1, :SR, alg) + alg = Lanczos(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T), + verbosity=1) + @test_logs eigsolve(wrapop(A, Val(mode)), + wrapvec(v, Val(mode)), n1, :SR, alg) + alg = Lanczos(; orth=orth, krylovdim=n1 + 1, maxiter=1, tol=tolerance(T), + verbosity=1) + @test_logs (:warn,) eigsolve(wrapop(A, Val(mode)), + wrapvec(v, Val(mode)), n1, :SR, alg) + alg = Lanczos(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T), + verbosity=2) + @test_logs (:info,) eigsolve(wrapop(A, Val(mode)), + wrapvec(v, Val(mode)), n1, :SR, alg) + alg = Lanczos(; orth=orth, krylovdim=n1, maxiter=3, tol=tolerance(T), + verbosity=3) + @test_logs((:info,), (:info,), (:info,), (:warn,), + eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), 1, :SR, alg)) + alg = Lanczos(; orth=orth, krylovdim=4, maxiter=1, tol=tolerance(T), + verbosity=4) + # since it is impossible to know exactly the size of the Krylov subspace after shrinking, + # we only know the output for a sigle iteration + @test_logs((:info,), (:info,), (:info,), (:info,), (:info,), (:warn,), + eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), 1, :SR, alg)) + @test KrylovKit.eigselector(wrapop(A, Val(mode)), scalartype(v); krylovdim=n, maxiter=1, tol=tolerance(T), ishermitian=true) isa Lanczos n2 = n - n1 - alg = Lanczos(; krylovdim=2 * n, maxiter=1, tol=tolerance(T), - verbosity=1) + alg = Lanczos(; krylovdim=2 * n, maxiter=1, tol=tolerance(T)) D2, V2, info = @constinferred eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n2, :LR, alg) @@ -30,9 +53,10 @@ @test A * U1 ≈ U1 * Diagonal(D1) @test A * U2 ≈ U2 * Diagonal(D2) - _ = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n + 1, :LM; - krylovdim=2n, - maxiter=1, tol=tolerance(T), verbosity=0) + alg = Lanczos(; orth=orth, krylovdim=2n, maxiter=1, tol=tolerance(T), + verbosity=1) + @test_logs (:warn,) (:warn,) eigsolve(wrapop(A, Val(mode)), + wrapvec(v, Val(mode)), n + 1, :LM, alg) end end end @@ -47,7 +71,7 @@ end A = (A + A') / 2 v = rand(T, (N,)) alg = Lanczos(; krylovdim=2 * n, maxiter=10, - tol=tolerance(T), eager=true) + tol=tolerance(T), eager=true, verbosity=0) D1, V1, info1 = @constinferred eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n, :SR, alg) D2, V2, info2 = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n, :LR, @@ -82,15 +106,40 @@ end A = rand(T, (n, n)) .- one(T) / 2 v = rand(T, (n,)) n1 = div(n, 2) - D1, V1, info1 = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, :SR; - orth=orth, krylovdim=n, - maxiter=1, tol=tolerance(T), verbosity=2) + alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T)) + D1, V1, info1 = @constinferred eigsolve(wrapop(A, Val(mode)), + wrapvec(v, Val(mode)), n1, :SR, alg) + + alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T), + verbosity=0) + @test_logs eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, :SR, alg) + alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T), + verbosity=1) + @test_logs eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, :SR, alg) + alg = Arnoldi(; orth=orth, krylovdim=n1 + 2, maxiter=1, tol=tolerance(T), + verbosity=1) + @test_logs (:warn,) eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, + :SR, alg) + alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T), + verbosity=2) + @test_logs (:info,) eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, + :SR, alg) + alg = Arnoldi(; orth=orth, krylovdim=n1, maxiter=3, tol=tolerance(T), + verbosity=3) + @test_logs((:info,), (:info,), (:info,), (:warn,), + eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), 1, :SR, alg)) + alg = Arnoldi(; orth=orth, krylovdim=4, maxiter=1, tol=tolerance(T), + verbosity=4) + # since it is impossible to know exactly the size of the Krylov subspace after shrinking, + # we only know the output for a sigle iteration + @test_logs((:info,), (:info,), (:info,), (:info,), (:info,), (:warn,), + eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), 1, :SR, alg)) + @test KrylovKit.eigselector(wrapop(A, Val(mode)), eltype(v); orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T)) isa Arnoldi n2 = n - n1 - alg = Arnoldi(; orth=orth, krylovdim=2 * n, maxiter=1, tol=tolerance(T), - verbosity=1) + alg = Arnoldi(; orth=orth, krylovdim=2 * n, maxiter=1, tol=tolerance(T)) D2, V2, info2 = @constinferred eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n2, :LR, alg) D = sort(sort(eigvals(A); by=imag, rev=true); alg=MergeSort, by=real) @@ -121,9 +170,10 @@ end @test A * U2 ≈ U2 * Diagonal(D2) end - _ = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n + 1, :LM; orth=orth, - krylovdim=2n, - maxiter=1, tol=tolerance(T), verbosity=0) + alg = Arnoldi(; orth=orth, krylovdim=2n, maxiter=1, tol=tolerance(T), + verbosity=1) + @test_logs (:warn,) (:warn,) eigsolve(wrapop(A, Val(mode)), + wrapvec(v, Val(mode)), n + 1, :LM, alg) end end end @@ -137,7 +187,7 @@ end A = rand(T, (N, N)) .- one(T) / 2 v = rand(T, (N,)) alg = Arnoldi(; krylovdim=3 * n, maxiter=20, - tol=tolerance(T), eager=true) + tol=tolerance(T), eager=true, verbosity=0) D1, V1, info1 = @constinferred eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n, :SR, alg) D2, V2, info2 = eigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n, :LR, @@ -192,3 +242,97 @@ end end end end + +@testset "Arnoldi - realeigsolve iteratively ($mode)" for mode in + (:vector, :inplace, :outplace) + scalartypes = mode === :vector ? (Float32, Float64) : (Float64,) + orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,) + @testset for T in scalartypes + @testset for orth in orths + V = exp(randn(T, (N, N)) / 10) + D = randn(T, N) + A = V * Diagonal(D) / V + v = rand(T, (N,)) + alg = Arnoldi(; krylovdim=3 * n, maxiter=20, + tol=tolerance(T), eager=true, verbosity=0) + D1, V1, info1 = @constinferred realeigsolve(wrapop(A, Val(mode)), + wrapvec(v, Val(mode)), n, :SR, alg) + D2, V2, info2 = realeigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n, + :LR, + alg) + D3, V3, info3 = realeigsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n, + :LM, + alg) + l1 = info1.converged + l2 = info2.converged + l3 = info3.converged + @test l1 > 0 + @test l2 > 0 + @test l3 > 0 + @test D1[1:l1] ≊ sort(D; alg=MergeSort)[1:l1] + @test D2[1:l2] ≊ sort(D; alg=MergeSort, rev=true)[1:l2] + # sorting by abs does not seem very reliable if two distinct eigenvalues are close + # in absolute value, so we perform a second sort afterwards using the real part + @test D3[1:l3] ≊ sort(D; by=abs, rev=true)[1:l3] + + @test eltype(D1) == T + @test eltype(D2) == T + @test eltype(D3) == T + + U1 = stack(unwrapvec, V1) + U2 = stack(unwrapvec, V2) + U3 = stack(unwrapvec, V3) + R1 = stack(unwrapvec, info1.residual) + R2 = stack(unwrapvec, info2.residual) + R3 = stack(unwrapvec, info3.residual) + @test A * U1 ≈ U1 * Diagonal(D1) + R1 + @test A * U2 ≈ U2 * Diagonal(D2) + R2 + @test A * U3 ≈ U3 * Diagonal(D3) + R3 + + if mode == :vector # solve eigenvalue problem as complex problem with real linear operator + V = exp(randn(T, (2N, 2N)) / 10) + D = randn(T, 2N) + Ar = V * Diagonal(D) / V + Z = zeros(T, N, N) + J = [Z -I; I Z] + Ar1 = (Ar - J * Ar * J) / 2 + Ar2 = (Ar + J * Ar * J) / 2 + A = complex.(Ar1[1:N, 1:N], -Ar1[1:N, (N + 1):end]) + B = complex.(Ar2[1:N, 1:N], +Ar2[1:N, (N + 1):end]) + f = buildrealmap(A, B) + v = rand(complex(T), (N,)) + alg = Arnoldi(; krylovdim=3 * n, maxiter=20, + tol=tolerance(T), eager=true, verbosity=0) + D1, V1, info1 = @constinferred realeigsolve(f, v, n, :SR, alg) + D2, V2, info2 = realeigsolve(f, v, n, :LR, alg) + D3, V3, info3 = realeigsolve(f, v, n, :LM, alg) + + l1 = info1.converged + l2 = info2.converged + l3 = info3.converged + @test l1 > 0 + @test l2 > 0 + @test l3 > 0 + @test D1[1:l1] ≊ sort(D; alg=MergeSort)[1:l1] + @test D2[1:l2] ≊ sort(D; alg=MergeSort, rev=true)[1:l2] + # sorting by abs does not seem very reliable if two distinct eigenvalues are close + # in absolute value, so we perform a second sort afterwards using the real part + @test D3[1:l3] ≊ sort(D; by=abs, rev=true)[1:l3] + + @test eltype(D1) == T + @test eltype(D2) == T + @test eltype(D3) == T + + U1 = stack(V1) + U2 = stack(V2) + U3 = stack(V3) + R1 = stack(info1.residual) + R2 = stack(info2.residual) + R3 = stack(info3.residual) + @test A * U1 + B * conj(U1) ≈ U1 * Diagonal(D1) + R1 + @test A * U2 + B * conj(U2) ≈ U2 * Diagonal(D2) + R2 + @test A * U3 + B * conj(U3) ≈ U3 * Diagonal(D3) + R3 + end + end + end +end diff --git a/test/expintegrator.jl b/test/expintegrator.jl index fd48972e..f59b72f3 100644 --- a/test/expintegrator.jl +++ b/test/expintegrator.jl @@ -25,17 +25,16 @@ end alg = Lanczos(; orth=orth, krylovdim=n, maxiter=2, tol=tolerance(T), verbosity=2) for k in 1:n - W[:, k] = unwrapvec(first(@constinferred exponentiate(wrapop(A, Val(mode)), - 1, - wrapvec(view(V, :, k), - Val(mode)), - alg))) + w, = @test_logs (:info,) exponentiate(wrapop(A, Val(mode)), 1, + wrapvec(view(V, :, k), + Val(mode)), alg) + W[:, k] = unwrapvec(w) end @test W ≈ exp(A) pmax = 5 alg = Lanczos(; orth=orth, krylovdim=n, maxiter=2, tol=tolerance(T), - verbosity=1) + verbosity=0) for t in (rand(real(T)), -rand(real(T)), im * randn(real(T)), randn(real(T)) + im * randn(real(T))) for p in 1:pmax @@ -66,17 +65,16 @@ end alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=2, tol=tolerance(T), verbosity=2) for k in 1:n - W[:, k] = unwrapvec(first(@constinferred exponentiate(wrapop(A, Val(mode)), - 1, - wrapvec(view(V, :, k), - Val(mode)), - alg))) + w, = @test_logs (:info,) exponentiate(wrapop(A, Val(mode)), 1, + wrapvec(view(V, :, k), + Val(mode)), alg) + W[:, k] = unwrapvec(w) end @test W ≈ exp(A) pmax = 5 alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=2, tol=tolerance(T), - verbosity=1) + verbosity=0) for t in (rand(real(T)), -rand(real(T)), im * randn(real(T)), randn(real(T)) + im * randn(real(T))) for p in 1:pmax @@ -102,10 +100,8 @@ end orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,) @testset for T in scalartypes @testset for orth in orths - A = rand(T, (N, N)) .- one(T) / 2 + A = (1 // 2) .* (rand(T, (N, N)) .- one(T) / 2) A = (A + A') / 2 - s = norm(eigvals(A), 1) - rmul!(A, 1 / (10 * s)) pmax = 5 for t in (rand(real(T)), -rand(real(T)), im * randn(real(T)), randn(real(T)) + im * randn(real(T))) @@ -115,7 +111,7 @@ end wrapvec.(u, Ref(Val(mode)))...; maxiter=100, krylovdim=n, eager=true) - @assert info.converged > 0 + @test info.converged > 0 w2 = exp(t * A) * u[1] for j in 1:p w2 .+= t^j * ϕ(t * A, u[j + 1], j) @@ -139,9 +135,7 @@ end orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,) @testset for T in scalartypes @testset for orth in orths - A = rand(T, (N, N)) .- one(T) / 2 - s = norm(eigvals(A), 1) - rmul!(A, 1 / (10 * s)) + A = (1 // 2) .* (rand(T, (N, N)) .- one(T) / 2) pmax = 5 for t in (rand(real(T)), -rand(real(T)), im * randn(real(T)), randn(real(T)) + im * randn(real(T))) @@ -156,12 +150,6 @@ end for j in 1:p w2 .+= t^j * ϕ(t * A, u[j + 1], j) end - @test w2 ≈ unwrapvec(w1) - w1, info = @constinferred expintegrator(wrapop(A, Val(mode)), t, - wrapvec.(u, Ref(Val(mode)))...; - maxiter=100, krylovdim=n, - tol=1e-3, eager=true) - @test unwrapvec(w1) ≈ w2 atol = 1e-2 * abs(t) end end end @@ -170,7 +158,7 @@ end @testset "Arnoldi - expintegrator fixed point branch" begin @testset for T in (ComplexF32, ComplexF64) # less probable that :LR eig is degenerate - A = rand(T, (N, N)) + A = rand(T, (N, N)) / 10 v₀ = rand(T, N) λs, vs, infoR = eigsolve(A, v₀, 1, :LR) @test infoR.converged > 0 diff --git a/test/factorize.jl b/test/factorize.jl index 467b195a..078857b3 100644 --- a/test/factorize.jl +++ b/test/factorize.jl @@ -3,6 +3,7 @@ scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) : (ComplexF64,) orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (cgs2,) + using KrylovKit: EACHITERATION_LEVEL @testset for T in scalartypes @testset for orth in orths # tests fail miserably for cgs and mgs @@ -10,22 +11,56 @@ v = rand(T, (n,)) A = (A + A') iter = LanczosIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), orth) - verbosity = 1 - fact = @constinferred initialize(iter; verbosity=verbosity) + fact = @constinferred initialize(iter) + @constinferred expand!(iter, fact) + @test_logs initialize(iter; verbosity=EACHITERATION_LEVEL) + @test_logs (:info,) initialize(iter; verbosity=EACHITERATION_LEVEL + 1) + verbosity = EACHITERATION_LEVEL + 1 while length(fact) < n - @constinferred expand!(iter, fact; verbosity=verbosity) - verbosity = 0 + if verbosity == EACHITERATION_LEVEL + 1 + @test_logs (:info,) expand!(iter, fact; verbosity=verbosity) + verbosity = EACHITERATION_LEVEL + else + @test_logs expand!(iter, fact; verbosity=verbosity) + verbosity = EACHITERATION_LEVEL + 1 + end end - V = stack(unwrapvec, basis(fact)) H = rayleighquotient(fact) @test normres(fact) < 10 * n * eps(real(T)) @test V' * V ≈ I @test A * V ≈ V * H - @constinferred initialize!(iter, deepcopy(fact); verbosity=1) states = collect(Iterators.take(iter, n)) # collect tests size and eltype? @test rayleighquotient(last(states)) ≈ H + + @constinferred shrink!(fact, n - 1) + @test_logs (:info,) shrink!(fact, n - 2; verbosity=EACHITERATION_LEVEL + 1) + @test_logs shrink!(fact, n - 3; verbosity=EACHITERATION_LEVEL) + @constinferred initialize!(iter, deepcopy(fact)) + @test_logs initialize!(iter, deepcopy(fact); verbosity=EACHITERATION_LEVEL) + @test_logs (:info,) initialize!(iter, deepcopy(fact); + verbosity=EACHITERATION_LEVEL + 1) + + if T <: Complex + A = rand(T, (n, n)) # test warnings for non-hermitian matrices + v = rand(T, (n,)) + iter = LanczosIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), orth) + fact = @constinferred initialize(iter; verbosity=0) + @constinferred expand!(iter, fact; verbosity=0) + @test_logs initialize(iter; verbosity=0) + @test_logs (:warn,) initialize(iter) + verbosity = 1 + while length(fact) < n + if verbosity == 1 + @test_logs (:warn,) expand!(iter, fact; verbosity=verbosity) + verbosity = 0 + else + @test_logs expand!(iter, fact; verbosity=verbosity) + verbosity = 1 + end + end + end end end end @@ -41,13 +76,20 @@ end A = rand(T, (n, n)) v = rand(T, (n,)) iter = ArnoldiIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), orth) - verbosity = 1 - fact = @constinferred initialize(iter; verbosity=verbosity) + fact = @constinferred initialize(iter) + @constinferred expand!(iter, fact) + @test_logs initialize(iter; verbosity=EACHITERATION_LEVEL) + @test_logs (:info,) initialize(iter; verbosity=EACHITERATION_LEVEL + 1) + verbosity = EACHITERATION_LEVEL + 1 while length(fact) < n - @constinferred expand!(iter, fact; verbosity=verbosity) - verbosity = 0 + if verbosity == EACHITERATION_LEVEL + 1 + @test_logs (:info,) expand!(iter, fact; verbosity=verbosity) + verbosity = EACHITERATION_LEVEL + else + @test_logs expand!(iter, fact; verbosity=verbosity) + verbosity = EACHITERATION_LEVEL + 1 + end end - V = stack(unwrapvec, basis(fact)) H = rayleighquotient(fact) factor = (orth == cgs || orth == mgs ? 250 : 10) @@ -55,9 +97,16 @@ end @test V' * V ≈ I @test A * V ≈ V * H - @constinferred initialize!(iter, deepcopy(fact); verbosity=1) states = collect(Iterators.take(iter, n)) # collect tests size and eltype? @test rayleighquotient(last(states)) ≈ H + + @constinferred shrink!(fact, n - 1) + @test_logs (:info,) shrink!(fact, n - 2; verbosity=EACHITERATION_LEVEL + 1) + @test_logs shrink!(fact, n - 3; verbosity=EACHITERATION_LEVEL) + @constinferred initialize!(iter, deepcopy(fact)) + @test_logs initialize!(iter, deepcopy(fact); verbosity=EACHITERATION_LEVEL) + @test_logs (:info,) initialize!(iter, deepcopy(fact); + verbosity=EACHITERATION_LEVEL + 1) end end end @@ -149,3 +198,101 @@ end end end end + +# Test complete Golub-Kahan-Lanczos factorization +@testset "Complete Golub-Kahan-Lanczos factorization ($mode)" for mode in + (:vector, :inplace, + :outplace, :mixed) + scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) : + (ComplexF64,) + orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,) + @testset for T in scalartypes + @testset for orth in orths + A = rand(T, (n, n)) + v = A * rand(T, (n,)) # ensure v is in column space of A + iter = GKLIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), orth) + fact = @constinferred initialize(iter) + @constinferred expand!(iter, fact) + @test_logs initialize(iter; verbosity=EACHITERATION_LEVEL) + @test_logs (:info,) initialize(iter; verbosity=EACHITERATION_LEVEL + 1) + verbosity = EACHITERATION_LEVEL + 1 + while length(fact) < n + if verbosity == EACHITERATION_LEVEL + 1 + @test_logs (:info,) expand!(iter, fact; verbosity=verbosity) + verbosity = EACHITERATION_LEVEL + else + @test_logs expand!(iter, fact; verbosity=verbosity) + verbosity = EACHITERATION_LEVEL + 1 + end + end + U = stack(unwrapvec, basis(fact, Val(:U))) + V = stack(unwrapvec, basis(fact, Val(:V))) + B = rayleighquotient(fact) + @test normres(fact) < 10 * n * eps(real(T)) + @test U' * U ≈ I + @test V' * V ≈ I + @test A * V ≈ U * B + @test A' * U ≈ V * B' + + states = collect(Iterators.take(iter, n)) # collect tests size and eltype? + @test rayleighquotient(last(states)) ≈ B + + @constinferred shrink!(fact, n - 1) + @test_logs (:info,) shrink!(fact, n - 2; verbosity=EACHITERATION_LEVEL + 1) + @test_logs shrink!(fact, n - 3; verbosity=EACHITERATION_LEVEL) + @constinferred initialize!(iter, deepcopy(fact)) + @test_logs initialize!(iter, deepcopy(fact); verbosity=EACHITERATION_LEVEL) + @test_logs (:info,) initialize!(iter, deepcopy(fact); + verbosity=EACHITERATION_LEVEL + 1) + end + end +end + +# Test incomplete Golub-Kahan-Lanczos factorization +@testset "Incomplete Golub-Kahan-Lanczos factorization ($mode)" for mode in + (:vector, :inplace, + :outplace, :mixed) + scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) : + (ComplexF64,) + orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,) + @testset for T in scalartypes + @testset for orth in orths + if T == Complex{Int} + A = rand(-100:100, (N, N)) + im * rand(-100:100, (N, N)) + v = rand(-100:100, (N,)) + else + A = rand(T, (N, N)) + v = rand(T, (N,)) + end + iter = @constinferred GKLIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), + orth) + krylovdim = 3 * n + fact = @constinferred initialize(iter) + while normres(fact) > eps(float(real(T))) && length(fact) < krylovdim + @constinferred expand!(iter, fact) + Ũ, Ṽ, B, r̃, β, e = fact + U = stack(unwrapvec, Ũ) + V = stack(unwrapvec, Ṽ) + r = unwrapvec(r̃) + @test U' * U ≈ I + @test V' * V ≈ I + @test norm(r) ≈ β + @test A * V ≈ U * B + r * e' + @test A' * U ≈ V * B' + end + + fact = @constinferred shrink!(fact, div(n, 2)) + U = stack(unwrapvec, @constinferred basis(fact, Val(:U))) + V = stack(unwrapvec, @constinferred basis(fact, Val(:V))) + B = @constinferred rayleighquotient(fact) + r = unwrapvec(@constinferred residual(fact)) + β = @constinferred normres(fact) + e = @constinferred rayleighextension(fact) + @test U' * U ≈ I + @test V' * V ≈ I + @test norm(r) ≈ β + @test A * V ≈ U * B + r * e' + @test A' * U ≈ V * B' + end + end +end diff --git a/test/geneigsolve.jl b/test/geneigsolve.jl index d154b81a..17e2d439 100644 --- a/test/geneigsolve.jl +++ b/test/geneigsolve.jl @@ -19,7 +19,49 @@ n1, :SR; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T), ishermitian=true, isposdef=true, - verbosity=2) + verbosity=0) + + if info.converged < n1 + @test_logs geneigsolve((wrapop(A, Val(mode)), + wrapop(B, Val(mode))), + wrapvec(v, Val(mode)), + n1, :SR; orth=orth, krylovdim=n, + maxiter=1, tol=tolerance(T), + ishermitian=true, isposdef=true, + verbosity=0) + @test_logs geneigsolve((wrapop(A, Val(mode)), + wrapop(B, Val(mode))), + wrapvec(v, Val(mode)), + n1, :SR; orth=orth, krylovdim=n, + maxiter=1, tol=tolerance(T), + ishermitian=true, isposdef=true, + verbosity=1) + @test_logs (:warn,) geneigsolve((wrapop(A, Val(mode)), + wrapop(B, Val(mode))), + wrapvec(v, Val(mode)), + n1, :SR; orth=orth, krylovdim=n1 + 1, + maxiter=1, tol=tolerance(T), + ishermitian=true, isposdef=true, + verbosity=1) + @test_logs (:info,) geneigsolve((wrapop(A, Val(mode)), + wrapop(B, Val(mode))), + wrapvec(v, Val(mode)), + n1, :SR; orth=orth, krylovdim=n, + maxiter=1, tol=tolerance(T), + ishermitian=true, isposdef=true, + verbosity=2) + alg = GolubYe(; orth=orth, krylovdim=n1, maxiter=3, tol=tolerance(T), + verbosity=3) + @test_logs((:info,), (:info,), (:info,), (:warn,), + geneigsolve((wrapop(A, Val(mode)), wrapop(B, Val(mode))), + wrapvec(v, Val(mode)), 1, :SR, alg)) + alg = GolubYe(; orth=orth, krylovdim=3, maxiter=2, tol=tolerance(T), + verbosity=4) + @test_logs((:info,), (:info,), (:info,), (:info,), + (:info,), (:info,), (:info,), (:info,), (:warn,), + geneigsolve((wrapop(A, Val(mode)), wrapop(B, Val(mode))), + wrapvec(v, Val(mode)), 1, :SR, alg)) + end @test KrylovKit.geneigselector((wrapop(A, Val(mode)), wrapop(B, Val(mode))), scalartype(v); orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T), ishermitian=true, @@ -55,7 +97,7 @@ end B = sqrt(B * B') v = rand(T, (N,)) alg = GolubYe(; orth=orth, krylovdim=3 * n, maxiter=100, - tol=cond(B) * tolerance(T)) + tol=cond(B) * tolerance(T), verbosity=0) D1, V1, info1 = @constinferred geneigsolve((wrapop(A, Val(mode)), wrapop(B, Val(mode))), wrapvec(v, Val(mode)), diff --git a/test/gklfactorize.jl b/test/gklfactorize.jl deleted file mode 100644 index 471e01b5..00000000 --- a/test/gklfactorize.jl +++ /dev/null @@ -1,83 +0,0 @@ -# Test complete Golub-Kahan-Lanczos factorization -@testset "Complete Golub-Kahan-Lanczos factorization ($mode)" for mode in - (:vector, :inplace, - :outplace, :mixed) - scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) : - (ComplexF64,) - orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,) - @testset for T in scalartypes - @testset for orth in orths - A = rand(T, (n, n)) - v = A * rand(T, (n,)) # ensure v is in column space of A - iter = GKLIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), orth) - verbosity = 1 - fact = @constinferred initialize(iter; verbosity=verbosity) - while length(fact) < n - @constinferred expand!(iter, fact; verbosity=verbosity) - verbosity = 0 - end - - U = stack(unwrapvec, basis(fact, :U)) - V = stack(unwrapvec, basis(fact, :V)) - B = rayleighquotient(fact) - @test normres(fact) < 10 * n * eps(real(T)) - @test U' * U ≈ I - @test V' * V ≈ I - @test A * V ≈ U * B - @test A' * U ≈ V * B' - - @constinferred initialize!(iter, deepcopy(fact); verbosity=1) - states = collect(Iterators.take(iter, n)) # collect tests size and eltype? - @test rayleighquotient(last(states)) ≈ B - end - end -end - -# Test incomplete Golub-Kahan-Lanczos factorization -@testset "Incomplete Golub-Kahan-Lanczos factorization ($mode)" for mode in - (:vector, :inplace, - :outplace, :mixed) - scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) : - (ComplexF64,) - orths = mode === :vector ? (cgs2, mgs2, cgsr, mgsr) : (mgsr,) - @testset for T in scalartypes - @testset for orth in orths - if T == Complex{Int} - A = rand(-100:100, (N, N)) + im * rand(-100:100, (N, N)) - v = rand(-100:100, (N,)) - else - A = rand(T, (N, N)) - v = rand(T, (N,)) - end - iter = @constinferred GKLIterator(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), - orth) - krylovdim = 3 * n - fact = @constinferred initialize(iter) - while normres(fact) > eps(float(real(T))) && length(fact) < krylovdim - @constinferred expand!(iter, fact) - Ũ, Ṽ, B, r̃, β, e = fact - U = stack(unwrapvec, Ũ) - V = stack(unwrapvec, Ṽ) - r = unwrapvec(r̃) - @test U' * U ≈ I - @test V' * V ≈ I - @test norm(r) ≈ β - @test A * V ≈ U * B + r * e' - @test A' * U ≈ V * B' - end - - fact = @constinferred shrink!(fact, div(n, 2)) - U = stack(unwrapvec, @constinferred basis(fact, :U)) - V = stack(unwrapvec, @constinferred basis(fact, :V)) - B = @constinferred rayleighquotient(fact) - r = unwrapvec(@constinferred residual(fact)) - β = @constinferred normres(fact) - e = @constinferred rayleighextension(fact) - @test U' * U ≈ I - @test V' * V ≈ I - @test norm(r) ≈ β - @test A * V ≈ U * B + r * e' - @test A' * U ≈ V * B' - end - end -end diff --git a/test/issues.jl b/test/issues.jl new file mode 100644 index 00000000..7efc7876 --- /dev/null +++ b/test/issues.jl @@ -0,0 +1,19 @@ +# https://github.com/Jutho/KrylovKit.jl/issues/100 +@testset "Issue #100" begin + N = 32 # needs to be large enough to trigger shrinking + A = rand(N, N) + A += A' + v₀ = [rand(N ÷ 2), rand(N ÷ 2)] + + vals, vecs, = eigsolve(v₀, 4, :LM; ishermitian=true) do v + v′ = vcat(v...) + y = A * v′ + return [y[1:(N ÷ 2)], y[(N ÷ 2 + 1):end]] + end + + vals2, vecs2, = eigsolve(A, 4, :LM; ishermitian=true) + @test vals ≈ vals2 + for (v, v′) in zip(vecs, vecs2) + @test abs(inner(vcat(v...), v′)) ≈ 1 + end +end diff --git a/test/linsolve.jl b/test/linsolve.jl index 22460113..d2f4e96e 100644 --- a/test/linsolve.jl +++ b/test/linsolve.jl @@ -6,17 +6,40 @@ A = rand(T, (n, n)) A = sqrt(A * A') b = rand(T, n) - alg = CG(; maxiter=2n, tol=tolerance(T) * norm(b), verbosity=2) # because of loss of orthogonality, we choose maxiter = 2n + alg = CG(; maxiter=2n, tol=tolerance(T) * norm(b), verbosity=0) # because of loss of orthogonality, we choose maxiter = 2n x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); ishermitian=true, isposdef=true, maxiter=2n, krylovdim=1, rtol=tolerance(T), - verbosity=1) + verbosity=0) @test info.converged > 0 @test unwrapvec(b) ≈ A * unwrapvec(x) - x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x; - ishermitian=true, isposdef=true, maxiter=2n, - krylovdim=1, rtol=tolerance(T)) + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + ishermitian=true, isposdef=true, maxiter=2n, + krylovdim=1, rtol=tolerance(T), + verbosity=0) + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + ishermitian=true, isposdef=true, maxiter=2n, + krylovdim=1, rtol=tolerance(T), + verbosity=1) + @test_logs (:info,) (:info,) linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + ishermitian=true, isposdef=true, maxiter=2n, + krylovdim=1, rtol=tolerance(T), + verbosity=2) + @test_logs min_level = Logging.Warn linsolve(wrapop(A, Val(mode)), + wrapvec(b, Val(mode)); + ishermitian=true, isposdef=true, + maxiter=2n, + krylovdim=1, rtol=tolerance(T), + verbosity=3) + + x, info = linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) @test info.numops == 1 + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) + alg = CG(; maxiter=2n, tol=tolerance(T) * norm(b), verbosity=1) + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) + alg = CG(; maxiter=2n, tol=tolerance(T) * norm(b), verbosity=2) + @test_logs (:info,) linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) + alg = CG(; maxiter=2n, tol=tolerance(T) * norm(b), verbosity=0) A = rand(T, (n, n)) A = sqrt(A * A') @@ -37,10 +60,27 @@ end A = rand(T, (N, N)) A = sqrt(sqrt(A * A')) / N b = rand(T, N) - x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + x₀ = rand(T, N) + x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(x₀, Val(mode)); isposdef=true, maxiter=1, krylovdim=N, rtol=tolerance(T)) @test unwrapvec(b) ≈ A * unwrapvec(x) + unwrapvec(info.residual) + if info.converged == 0 + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(x₀, Val(mode)); + isposdef=true, maxiter=1, krylovdim=N, + rtol=tolerance(T), verbosity=0) + @test_logs (:warn,) linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(x₀, Val(mode)); + isposdef=true, maxiter=1, krylovdim=N, + rtol=tolerance(T), verbosity=1) + @test_logs (:info,) (:warn,) linsolve(wrapop(A, Val(mode)), + wrapvec(b, Val(mode)), + wrapvec(x₀, Val(mode)); + isposdef=true, maxiter=1, krylovdim=N, + rtol=tolerance(T), verbosity=2) + end α₀ = rand(real(T)) + 1 α₁ = rand(real(T)) @@ -59,23 +99,60 @@ end @testset for T in scalartypes A = rand(T, (n, n)) .- one(T) / 2 b = rand(T, n) - alg = GMRES(; krylovdim=n, maxiter=2, tol=tolerance(T) * norm(b), verbosity=2) + alg = GMRES(; krylovdim=n, maxiter=2, tol=tolerance(T) * norm(b), verbosity=0) x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); krylovdim=n, maxiter=2, - rtol=tolerance(T), verbosity=1) - @test info.converged > 0 + rtol=tolerance(T), verbosity=0) + @test info.converged == 1 @test unwrapvec(b) ≈ A * unwrapvec(x) - x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x; - krylovdim=n, maxiter=2, - rtol=tolerance(T)) + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + krylovdim=n, maxiter=2, + rtol=tolerance(T), verbosity=0) + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + krylovdim=n, maxiter=2, + rtol=tolerance(T), verbosity=1) + @test_logs (:info,) (:info,) linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + krylovdim=n, maxiter=2, + rtol=tolerance(T), verbosity=2) + @test_logs min_level = Logging.Warn linsolve(wrapop(A, Val(mode)), + wrapvec(b, Val(mode)); + krylovdim=n, maxiter=2, + rtol=tolerance(T), verbosity=3) + + alg = GMRES(; krylovdim=n, maxiter=2, tol=tolerance(T) * norm(b), verbosity=0) + x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, + alg) @test info.numops == 1 + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) + alg = GMRES(; krylovdim=n, maxiter=2, tol=tolerance(T) * norm(b), verbosity=1) + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) + alg = GMRES(; krylovdim=n, maxiter=2, tol=tolerance(T) * norm(b), verbosity=2) + @test_logs (:info,) linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) + alg = GMRES(; krylovdim=n, maxiter=2, tol=tolerance(T) * norm(b), verbosity=0) + + nreal = (T <: Real) ? n : 2n + algr = GMRES(; krylovdim=nreal, maxiter=2, tol=tolerance(T) * norm(b), verbosity=0) + xr, infor = @constinferred reallinsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + zerovector(x), algr) + @test infor.converged == 1 + @test unwrapvec(x) ≈ unwrapvec(xr) A = rand(T, (n, n)) α₀ = rand(T) α₁ = -rand(T) x, info = @constinferred(linsolve(A, b, zerovector(b), alg, α₀, α₁)) @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x) - @test info.converged > 0 + @test info.converged == 1 + + if mode == :vector && T <: Complex + B = rand(T, (n, n)) + f = buildrealmap(A, B) + α₀ = rand(real(T)) + α₁ = -rand(real(T)) + xr, infor = @constinferred reallinsolve(f, b, zerovector(b), algr, α₀, α₁) + @test infor.converged == 1 + @test b ≈ (α₀ * xr + α₁ * A * xr + α₁ * B * conj(xr)) + end end end @@ -87,53 +164,148 @@ end A = rand(T, (N, N)) .- one(T) / 2 A = I - T(9 / 10) * A / maximum(abs, eigvals(A)) b = rand(T, N) - x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + x₀ = rand(T, N) + x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(x₀, Val(mode)); krylovdim=3 * n, maxiter=50, rtol=tolerance(T)) @test unwrapvec(b) ≈ A * unwrapvec(x) + unwrapvec(info.residual) + if info.converged == 0 + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(x₀, Val(mode)); + krylovdim=3 * n, + maxiter=50, rtol=tolerance(T), verbosity=0) + @test_logs (:warn,) linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(x₀, Val(mode)); + krylovdim=3 * n, + maxiter=50, rtol=tolerance(T), verbosity=1) + @test_logs (:info,) (:warn,) linsolve(wrapop(A, Val(mode)), + wrapvec(b, Val(mode)), + wrapvec(x₀, Val(mode)); + krylovdim=3 * n, + maxiter=50, rtol=tolerance(T), + verbosity=2) + end + + alg = GMRES(; krylovdim=3 * n, maxiter=50, tol=tolerance(T) * norm(b), verbosity=0) + xr, infor = @constinferred reallinsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + zerovector(x), alg) + @test unwrapvec(b) ≈ A * unwrapvec(xr) + unwrapvec(infor.residual) A = rand(T, (N, N)) .- one(T) / 2 α₀ = maximum(abs, eigvals(A)) - α₁ = -rand(T) - α₁ *= T(9) / T(10) / abs(α₁) + α₁ = -9 * rand(T) / 10 x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), α₀, α₁; krylovdim=3 * n, maxiter=50, rtol=tolerance(T)) @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x) + unwrapvec(info.residual) + + if mode == :vector && T <: Complex + A = rand(T, (N, N)) .- one(T) / 2 + B = rand(T, (N, N)) .- one(T) / 2 + f = buildrealmap(A, B) + α₀ = 1 + α₁ = -1 / (maximum(abs, eigvals(A)) + maximum(abs, eigvals(B))) + xr, infor = @constinferred reallinsolve(f, b, zerovector(b), alg, α₀, α₁) + @test b ≈ (α₀ * xr + α₁ * A * xr + α₁ * B * conj(xr)) + infor.residual + end end end -# Test BICGStab -@testset "BiCGStab ($mode)" for mode in (:vector, :inplace, :outplace) +# Test BiCGStab +@testset "BiCGStab small problem ($mode)" for mode in (:vector, :inplace, :outplace) scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) : (ComplexF64,) @testset for T in scalartypes A = rand(T, (n, n)) .- one(T) / 2 A = I - T(9 / 10) * A / maximum(abs, eigvals(A)) b = rand(T, n) - alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=2) + alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=0) x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), wrapvec(zerovector(b), Val(mode)), alg) @test info.converged > 0 @test unwrapvec(b) ≈ A * unwrapvec(x) + alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=0) + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(zerovector(b), Val(mode)), alg) + alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=1) + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(zerovector(b), Val(mode)), alg) + alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=2) + @test_logs (:info,) (:info,) linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(zerovector(b), Val(mode)), alg) + alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=3) + @test_logs min_level = Logging.Warn linsolve(wrapop(A, Val(mode)), + wrapvec(b, Val(mode)), + wrapvec(zerovector(b), Val(mode)), alg) + alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=0) + x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) @test info.numops == 1 + alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=0) + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) + alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=1) + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) + alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=2) + @test_logs (:info,) linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg) + alg = BiCGStab(; maxiter=4n, tol=tolerance(T) * norm(b), verbosity=0) + + α₀ = rand(real(T)) + 1 + α₁ = rand(real(T)) + x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(zerovector(b), Val(mode)), alg, α₀, α₁) + @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x) + @test info.converged > 0 + end +end +@testset "BiCGStab large problem ($mode)" for mode in (:vector, :inplace, :outplace) + scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) : + (ComplexF64,) + @testset for T in scalartypes A = rand(T, (N, N)) .- one(T) / 2 b = rand(T, N) α₀ = maximum(abs, eigvals(A)) - α₁ = -rand(T) - α₁ *= T(9) / T(10) / abs(α₁) - alg = BiCGStab(; maxiter=2, tol=tolerance(T) * norm(b), verbosity=1) + α₁ = -9 * rand(real(T)) / 10 + alg = BiCGStab(; maxiter=2, tol=tolerance(T) * norm(b), verbosity=0) x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), wrapvec(zerovector(b), Val(mode)), alg, α₀, α₁) @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x) + unwrapvec(info.residual) + if info.converged == 0 + @test_logs linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(zerovector(b), Val(mode)), alg, α₀, α₁) + alg = BiCGStab(; maxiter=2, tol=tolerance(T) * norm(b), verbosity=1) + @test_logs (:warn,) linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + wrapvec(zerovector(b), Val(mode)), alg, α₀, α₁) + alg = BiCGStab(; maxiter=2, tol=tolerance(T) * norm(b), verbosity=2) + @test_logs (:info,) (:warn,) linsolve(wrapop(A, Val(mode)), + wrapvec(b, Val(mode)), + wrapvec(zerovector(b), Val(mode)), alg, + α₀, α₁) + end + alg = BiCGStab(; maxiter=10 * N, tol=tolerance(T) * norm(b), verbosity=0) x, info = @constinferred linsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), x, alg, α₀, α₁) @test info.converged > 0 @test unwrapvec(b) ≈ (α₀ * I + α₁ * A) * unwrapvec(x) + + xr, infor = @constinferred reallinsolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + zerovector(x), alg, α₀, α₁) + @test infor.converged > 0 + @test unwrapvec(xr) ≈ unwrapvec(x) + + if mode == :vector && T <: Complex + A = rand(T, (N, N)) .- one(T) / 2 + B = rand(T, (N, N)) .- one(T) / 2 + f = buildrealmap(A, B) + α₀ = 1 + α₁ = -1 / (maximum(abs, eigvals(A)) + maximum(abs, eigvals(B))) + xr, infor = @constinferred reallinsolve(f, b, zerovector(b), alg, α₀, α₁) + @test info.converged > 0 + @test b ≈ (α₀ * xr + α₁ * A * xr + α₁ * B * conj(xr)) + end end end diff --git a/test/lssolve.jl b/test/lssolve.jl new file mode 100644 index 00000000..c91f3371 --- /dev/null +++ b/test/lssolve.jl @@ -0,0 +1,96 @@ +# Test LSMR complete +@testset "LSMR small problem ($mode)" for mode in (:vector, :inplace, :outplace, :mixed) + scalartypes = mode === :vector ? (Float32, Float64, ComplexF32, ComplexF64) : + (ComplexF64,) + @testset for T in scalartypes + A = rand(T, (2 * n, n)) + U, S, V = svd(A) + invS = 1 ./ S + S[end] = 0 # make rank deficient + invS[end] = 0 # choose minimal norm solution + A = U * Diagonal(S) * V' + + b = rand(T, 2 * n) + tol = tol = 10 * n * eps(real(T)) + x, info = @constinferred lssolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + maxiter=3, krylovdim=1, verbosity=0) # no reorthogonalization + r = b - A * unwrapvec(x) + @test unwrapvec(info.residual) ≈ r + @test info.normres ≈ norm(A' * r) + @test info.converged == 0 + @test_logs lssolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); maxiter=3, + verbosity=0) + @test_logs (:warn,) lssolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); maxiter=3, + verbosity=1) + @test_logs (:info,) (:warn,) lssolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + maxiter=3, verbosity=2) + + alg = LSMR(; maxiter=n, tol=tol, verbosity=0, krylovdim=n) + # reorthogonalisation is essential here to converge in exactly n iterations + x, info = @constinferred lssolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), alg) + + @test info.converged > 0 + @test abs(inner(V[:, end], unwrapvec(x))) < alg.tol + @test unwrapvec(x) ≈ V * Diagonal(invS) * U' * b + @test_logs lssolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), alg) + alg = LSMR(; maxiter=2 * n, tol=tol, verbosity=1) + @test_logs lssolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), alg) + alg = LSMR(; maxiter=2 * n, tol=tol, verbosity=2) + @test_logs (:info,) (:info,) lssolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), + alg) + alg = LSMR(; maxiter=2 * n, tol=tol, verbosity=3) + @test_logs min_level = Logging.Warn lssolve(wrapop(A, Val(mode)), + wrapvec(b, Val(mode)), + alg) + + λ = rand(real(T)) + alg = LSMR(; maxiter=n, tol=tol, verbosity=0, krylovdim=n) + # reorthogonalisation is essential here to converge in exactly n iterations + x, info = @constinferred lssolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)), alg, + λ) + + r = b - A * unwrapvec(x) + @test info.converged > 0 + @test A' * r ≈ λ^2 * unwrapvec(x) atol = 2 * tol + + if mode == :vector && T <: Complex + A = rand(T, (2 * n, n)) .- one(T) / 2 + B = rand(T, (2 * n, n)) .- one(T) / 2 + f = buildrealmap(A, B) + # the effective linear problem has twice the size, so 4n x 2n + alg = LSMR(; maxiter=2 * n, tol=tol, verbosity=0, krylovdim=2 * n) + xr, infor = @constinferred reallssolve(f, b, alg) + @test infor.converged > 0 + y = (A * xr + B * conj(xr)) + @test b ≈ y + infor.residual + @test (A' * b + conj(B' * b)) ≈ (A' * y + conj(B' * y)) + end + end +end +@testset "LSMR large problem ($mode)" for mode in (:vector, :inplace, :outplace, :mixed) + scalartypes = mode === :vector ? (Float64, ComplexF64) : (ComplexF64,) + @testset for T in scalartypes + A = rand(T, (2 * N, N)) .- (one(T) / 2) + b = rand(T, 2 * N) .- (one(T) / 2) + + tol = 10 * N * eps(real(T)) + x, info = @constinferred lssolve(wrapop(A, Val(mode)), wrapvec(b, Val(mode)); + maxiter=N, tol=tol, verbosity=0, krylovdim=5) + + r = b - A * unwrapvec(x) + @test info.converged > 0 + @test norm(A' * r) < 5 * tol # there seems to be some loss of precision in the computation of the convergence measure + + if mode == :vector && T <: Complex + A = rand(T, (2 * N, N)) .- one(T) / 2 + B = rand(T, (2 * N, N)) .- one(T) / 2 + f = buildrealmap(A, B) + alg = LSMR(; maxiter=N, tol=tol, verbosity=0, krylovdim=5) + xr, infor = @constinferred reallssolve(f, b, alg) + @test infor.converged > 0 + y = (A * xr + B * conj(xr)) + @test b ≈ y + infor.residual + @test (A' * b + conj(B' * b)) ≈ (A' * y + conj(B' * y)) + end + end +end diff --git a/test/recursivevec.jl b/test/nestedtuple.jl similarity index 83% rename from test/recursivevec.jl rename to test/nestedtuple.jl index 08d5a4c3..43d981b6 100644 --- a/test/recursivevec.jl +++ b/test/nestedtuple.jl @@ -1,15 +1,16 @@ -@testset "RecursiveVec - singular values full" begin +# TODO: Remove RecursiveVec and simply use tuple when RecursiveVec is removed. +@testset "Nested tuples - singular values full" begin @testset for T in (Float32, Float64, ComplexF32, ComplexF64) @testset for orth in (cgs2, mgs2, cgsr, mgsr) A = rand(T, (n, n)) v = rand(T, (n,)) - v2 = RecursiveVec(v, zero(v)) + v2 = (v, zero(v)) alg = Lanczos(; orth=orth, krylovdim=2 * n, maxiter=1, tol=tolerance(T)) D, V, info = eigsolve(v2, n, :LR, alg) do x x1, x2 = x y1 = A * x2 y2 = A' * x1 - return RecursiveVec(y1, y2) + return (y1, y2) end @test info.converged >= n S = D[1:n] @@ -21,20 +22,20 @@ end end -@testset "RecursiveVec - singular values iteratively" begin +@testset "Nested tuples - singular values iteratively" begin @testset for T in (Float32, Float64, ComplexF32, ComplexF64) @testset for orth in (cgs2, mgs2, cgsr, mgsr) A = rand(T, (N, 2 * N)) v = rand(T, (N,)) w = rand(T, (2 * N,)) - v2 = RecursiveVec(v, w) + v2 = (v, w) alg = Lanczos(; orth=orth, krylovdim=n, maxiter=300, tol=tolerance(T)) n1 = div(n, 2) D, V, info = eigsolve(v2, n1, :LR, alg) do x x1, x2 = x y1 = A * x2 y2 = A' * x1 - return RecursiveVec(y1, y2) + return (y1, y2) end @test info.converged >= n1 S = D[1:n1] diff --git a/test/runtests.jl b/test/runtests.jl index 16a35c31..3958786c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,7 @@ using Random Random.seed!(76543210) -using Test, TestExtras +using Test, TestExtras, Logging using LinearAlgebra using KrylovKit using VectorInterface @@ -25,22 +25,50 @@ const mgsr = ModifiedGramSchmidtIR(η₀) # Tests # ----- t = time() -include("factorize.jl") -include("gklfactorize.jl") - -include("linsolve.jl") -include("eigsolve.jl") -include("schursolve.jl") -include("geneigsolve.jl") -include("svdsolve.jl") -include("expintegrator.jl") - -include("linalg.jl") -include("recursivevec.jl") - -include("ad.jl") +@testset "Krylov factorisations" verbose = true begin + include("factorize.jl") +end +@testset "Linear problems with linsolve" verbose = true begin + include("linsolve.jl") +end +@testset "Least squares problems with lssolve" verbose = true begin + include("lssolve.jl") +end +@testset "Eigenvalue problems with eigsolve" verbose = true begin + include("eigsolve.jl") + include("schursolve.jl") + include("geneigsolve.jl") +end +@testset "Singular value problems with svdsolve" verbose = true begin + include("svdsolve.jl") +end +@testset "Exponentiate and exponential integrator" verbose = true begin + include("expintegrator.jl") +end +@testset "Linear Algebra Utilities" verbose = true begin + include("linalg.jl") +end +@testset "Singular value problems via eigsolve with nested tuples" verbose = true begin + include("nestedtuple.jl") +end +@testset "Linsolve differentiation rules" verbose = true begin + include("ad/linsolve.jl") +end +@testset "Eigsolve differentiation rules" verbose = true begin + include("ad/eigsolve.jl") + include("ad/degenerateeigsolve.jl") +end +@testset "Svdsolve differentiation rules" verbose = true begin + include("ad/svdsolve.jl") +end t = time() - t + +# Issues +# ------ +@testset "Known issues" verbose = true begin + include("issues.jl") +end println("Tests finished in $t seconds") module AquaTests diff --git a/test/schursolve.jl b/test/schursolve.jl index aebd4184..57a8e6ae 100644 --- a/test/schursolve.jl +++ b/test/schursolve.jl @@ -11,6 +11,24 @@ T1, V1, D1, info1 = @constinferred schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, :SR, alg) + @test_logs schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, :SR, + alg) + alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T), + verbosity=1) + @test_logs schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, :SR, + alg) + alg = Arnoldi(; orth=orth, krylovdim=n1 + 1, maxiter=1, tol=tolerance(T), + verbosity=1) + @test_logs (:warn,) schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, + :SR, + alg) + alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T), + verbosity=2) + @test_logs (:info,) schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n1, + :SR, + alg) + + alg = Arnoldi(; orth=orth, krylovdim=n, maxiter=1, tol=tolerance(T)) n2 = n - n1 T2, V2, D2, info2 = schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n2, :LR, alg) @@ -57,7 +75,8 @@ end @testset for orth in orths A = rand(T, (N, N)) .- one(T) / 2 v = rand(T, (N,)) - alg = Arnoldi(; orth=orth, krylovdim=3 * n, maxiter=10, tol=tolerance(T)) + alg = Arnoldi(; orth=orth, krylovdim=3 * n, maxiter=10, tol=tolerance(T), + verbosity=0) T1, V1, D1, info1 = @constinferred schursolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n, :SR, alg) diff --git a/test/svdsolve.jl b/test/svdsolve.jl index bad51573..d9e3b98f 100644 --- a/test/svdsolve.jl +++ b/test/svdsolve.jl @@ -9,8 +9,32 @@ S, lvecs, rvecs, info = @constinferred svdsolve(wrapop(A, Val(mode)), wrapvec(A[:, 1], Val(mode)), n, :LR, alg) - @test S ≈ svdvals(A) + @test info.converged == n + + n1 = div(n, 2) + @test_logs svdsolve(wrapop(A, Val(mode)), wrapvec(A[:, 1], Val(mode)), n1, :LR, + alg) + alg = GKL(; orth=orth, krylovdim=2 * n, maxiter=1, tol=tolerance(T), + verbosity=1) + @test_logs svdsolve(wrapop(A, Val(mode)), wrapvec(A[:, 1], Val(mode)), n1, :LR, + alg) + alg = GKL(; orth=orth, krylovdim=n1 + 1, maxiter=1, tol=tolerance(T), + verbosity=1) + @test_logs (:warn,) svdsolve(wrapop(A, Val(mode)), wrapvec(A[:, 1], Val(mode)), + n1, :LR, + alg) + alg = GKL(; orth=orth, krylovdim=2 * n, maxiter=1, tol=tolerance(T), + verbosity=2) + @test_logs (:info,) svdsolve(wrapop(A, Val(mode)), wrapvec(A[:, 1], Val(mode)), + n1, :LR, + alg) + alg = GKL(; orth=orth, krylovdim=2 * n, maxiter=1, tol=tolerance(T), + verbosity=4) + @test_logs min_level = Logging.Warn svdsolve(wrapop(A, Val(mode)), + wrapvec(A[:, 1], Val(mode)), + n1, :LR, + alg) U = stack(unwrapvec, lvecs) V = stack(unwrapvec, rvecs) @@ -31,7 +55,8 @@ end A = rand(T, (2 * N, N)) v = rand(T, (2 * N,)) n₁ = div(n, 2) - alg = GKL(; orth=orth, krylovdim=n, maxiter=10, tol=tolerance(T), eager=true) + alg = GKL(; orth=orth, krylovdim=n, maxiter=10, tol=tolerance(T), eager=true, + verbosity=0) S, lvecs, rvecs, info = @constinferred svdsolve(wrapop(A, Val(mode)), wrapvec(v, Val(mode)), n₁, :LR, alg) diff --git a/test/testsetup.jl b/test/testsetup.jl index 8d4bb91b..57f88b8f 100644 --- a/test/testsetup.jl +++ b/test/testsetup.jl @@ -1,7 +1,7 @@ module TestSetup export tolerance, ≊, MinimalVec, isinplace, stack -export wrapop, wrapvec, unwrapvec +export wrapop, wrapvec, unwrapvec, buildrealmap import VectorInterface as VI using VectorInterface @@ -10,7 +10,7 @@ using LinearAlgebra: LinearAlgebra # Utility functions # ----------------- "function for determining the precision of a type" -tolerance(T::Type{<:Number}) = eps(real(T))^(2 / 3) +tolerance(T::Type{<:Number}) = eps(real(T))^(2 // 3) "function for comparing sets of eigenvalues" function ≊(list1::AbstractVector, list2::AbstractVector) @@ -26,83 +26,34 @@ function ≊(list1::AbstractVector, list2::AbstractVector) return list1 ≈ view(list2, p) end -# Minimal vector type -# ------------------- -""" - MinimalVec{T<:Number,IP} - -Minimal interface for a vector. Can support either in-place assignments or not, depending on -`IP=true` or `IP=false`. -""" -struct MinimalVec{IP,V<:AbstractVector} - vec::V - function MinimalVec{IP}(vec::V) where {IP,V} - return new{IP,V}(vec) +function buildrealmap(A, B) + function f(x) + return A * x + B * conj(x) end + function f(x, ::Val{C}) where {C} + if C == false + return A * x + B * conj(x) + else + return adjoint(A) * x + transpose(B) * conj(x) + end + end + return f end -const InplaceVec{V} = MinimalVec{true,V} -const OutplaceVec{V} = MinimalVec{false,V} - -isinplace(::Type{MinimalVec{IP,V}}) where {V,IP} = IP -isinplace(v::MinimalVec) = isinplace(typeof(v)) - -VI.scalartype(::Type{<:MinimalVec{IP,V}}) where {IP,V} = scalartype(V) - -function VI.zerovector(v::MinimalVec, S::Type{<:Number}) - return MinimalVec{isinplace(v)}(zerovector(v.vec, S)) -end -function VI.zerovector!(v::InplaceVec{V}) where {V} - zerovector!(v.vec) - return v -end -VI.zerovector!!(v::MinimalVec) = isinplace(v) ? zerovector!(v) : zerovector(v) - -function VI.scale(v::MinimalVec, α::Number) - return MinimalVec{isinplace(v)}(scale(v.vec, α)) -end -function VI.scale!(v::InplaceVec{V}, α::Number) where {V} - scale!(v.vec, α) - return v -end -function VI.scale!!(v::MinimalVec, α::Number) - return isinplace(v) ? scale!(v, α) : scale(v, α) -end -function VI.scale!(w::InplaceVec{V}, v::InplaceVec{W}, α::Number) where {V,W} - scale!(w.vec, v.vec, α) - return w -end -function VI.scale!!(w::MinimalVec, v::MinimalVec, α::Number) - isinplace(w) && return scale!(w, v, α) - return MinimalVec{false}(scale!!(copy(w.vec), v.vec, α)) -end - -function VI.add(y::MinimalVec, x::MinimalVec, α::Number, β::Number) - return MinimalVec{isinplace(y)}(add(y.vec, x.vec, α, β)) -end -function VI.add!(y::InplaceVec{W}, x::InplaceVec{V}, α::Number, β::Number) where {W,V} - add!(y.vec, x.vec, α, β) - return y -end -function VI.add!!(y::MinimalVec, x::MinimalVec, α::Number, β::Number) - return isinplace(y) ? add!(y, x, α, β) : add(y, x, α, β) -end - -VI.inner(x::MinimalVec, y::MinimalVec) = inner(x.vec, y.vec) -VI.norm(x::MinimalVec) = LinearAlgebra.norm(x.vec) # Wrappers # -------- +using VectorInterface: MinimalSVec, MinimalMVec, MinimalVec # dispatch on val is necessary for type stability function wrapvec(v, ::Val{mode}) where {mode} return mode === :vector ? v : - mode === :inplace ? MinimalVec{true}(v) : - mode === :outplace ? MinimalVec{false}(v) : - mode === :mixed ? MinimalVec{false}(v) : + mode === :inplace ? MinimalMVec(v) : + mode === :outplace ? MinimalSVec(v) : + mode === :mixed ? MinimalSVec(v) : throw(ArgumentError("invalid mode ($mode)")) end function wrapvec2(v, ::Val{mode}) where {mode} - return mode === :mixed ? MinimalVec{true}(v) : wrapvec(v, mode) + return mode === :mixed ? MinimalMVec(v) : wrapvec(v, mode) end unwrapvec(v::MinimalVec) = v.vec @@ -129,6 +80,7 @@ end if VERSION < v"1.9" stack(f, itr) = mapreduce(f, hcat, itr) + stack(itr) = reduce(hcat, itr) end end