From 0732c9befdd0f03f787ecb2e956acef319a0834a Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 26 Nov 2019 22:26:21 -0600
Subject: [PATCH 001/222] Switch to FastTransforms_jll (#92)

* try using FastTransforms_jll

* preserve option to build from source, but default is false

remove the option to grab binaries from releases because they're built with -march=native, which is technically unsafe

* Create .cirrus.yml

* Update README.md

* add Cirrus badge

* Update hermitetests.jl

* maintain the right to build from source

but this is not tested

* minor release 0.8
---
 .appveyor.yml            | 18 ++++--------------
 .cirrus.yml              | 16 ++++++++++++++++
 .travis.yml              | 10 ----------
 Project.toml             |  5 +++--
 README.md                |  4 ++--
 deps/build.jl            | 23 ++---------------------
 src/FastTransforms.jl    |  9 +++++----
 src/libfasttransforms.jl | 15 ++++++++++-----
 test/hermitetests.jl     |  2 +-
 9 files changed, 43 insertions(+), 59 deletions(-)
 create mode 100644 .cirrus.yml

diff --git a/.appveyor.yml b/.appveyor.yml
index 743eee76..787c206b 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,18 +1,12 @@
-# Documentation: http://www.appveyor.com/docs/appveyor-yml/
-image: Visual Studio 2017
-
-cache: c:\tools\vcpkg\installed\
-
-platform: x64
-
 environment:
   matrix:
-  - julia_version: 1.0
-  - julia_version: 1.1
-  - julia_version: 1.2
   - julia_version: 1.3
   - julia_version: nightly
 
+platform:
+  - x86 # 32-bit
+  - x64 # 64-bit
+
 # # Uncomment the following lines to allow failures on nightly julia
 # # (tests will run but not make your overall status red)
 matrix:
@@ -32,10 +26,6 @@ notifications:
 
 install:
   - ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1"))
-  - set PATH=%PATH%;c:\tools\vcpkg
-  - set PATH=%PATH%;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin\
-  - vcpkg install openblas:x64-windows fftw3[core,threads]:x64-windows mpfr:x64-windows mpir:x64-windows --recurse
-  - set PATH=c:\tools\vcpkg\installed\x64-windows\bin;%PATH%
 
 build_script:
   - echo "%JL_BUILD_SCRIPT%"
diff --git a/.cirrus.yml b/.cirrus.yml
new file mode 100644
index 00000000..d7ba1cf2
--- /dev/null
+++ b/.cirrus.yml
@@ -0,0 +1,16 @@
+freebsd_instance:
+  image: freebsd-12-0-release-amd64
+task:
+  name: FreeBSD
+  env:
+    matrix:
+      - JULIA_VERSION: 1.3
+      - JULIA_VERSION: nightly
+  install_script:
+    - sh -c "$(fetch https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh -o -)"
+  build_script:
+    - cirrusjl build
+  test_script:
+    - cirrusjl test
+  coverage_script:
+    - cirrusjl coverage codecov coveralls
diff --git a/.travis.yml b/.travis.yml
index bab47058..2b2d6154 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,21 +4,11 @@ os:
   - linux
   - osx
 julia:
-  - 1.0
-  - 1.1
-  - 1.2
   - 1.3
   - nightly
 matrix:
   allow_failures:
     - julia: nightly
-addons:
-  apt:
-    packages: ['libquadmath0', 'libgomp1', 'libopenblas-dev', 'libfftw3-dev', 'libmpfr-dev']
-  homebrew:
-    packages: ['gcc@8', 'fftw', 'mpfr']
-    update: true
-
 notifications:
   email: false
 after_success:
diff --git a/Project.toml b/Project.toml
index 39a2acf6..ac355807 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.7.0"
+version = "0.8.0"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -8,6 +8,7 @@ BinaryProvider = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
 DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
+FastTransforms_jll = "34b6f7d7-08f9-5794-9e10-3819e4c7e49a"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
@@ -24,4 +25,4 @@ FastGaussQuadrature = "0.4"
 Reexport = "0.2"
 SpecialFunctions = "0.8"
 ToeplitzMatrices = "0.6"
-julia = "1"
+julia = "≥ 1.3"
diff --git a/README.md b/README.md
index 6ad759d8..c3c0431c 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # FastTransforms.jl
 
-[![Travis](https://travis-ci.org/JuliaApproximation/FastTransforms.jl.svg?branch=master)](https://travis-ci.org/JuliaApproximation/FastTransforms.jl) [![AppVeyor](https://ci.appveyor.com/api/projects/status/oba9qush15q3x8pb/branch/master?svg=true)](https://ci.appveyor.com/project/MikaelSlevinsky/fasttransforms-jl/branch/master) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-latest-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/latest)
+[![Travis](https://travis-ci.org/JuliaApproximation/FastTransforms.jl.svg?branch=master)](https://travis-ci.org/JuliaApproximation/FastTransforms.jl) [![AppVeyor](https://ci.appveyor.com/api/projects/status/oba9qush15q3x8pb/branch/master?svg=true)](https://ci.appveyor.com/project/MikaelSlevinsky/fasttransforms-jl/branch/master) [![Cirrus](https://api.cirrus-ci.com/github/JuliaApproximation/FastTransforms.jl.svg)](https://cirrus-ci.com/github/JuliaApproximation/FastTransforms.jl) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-latest-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/latest)
 
 `FastTransforms.jl` allows the user to conveniently work with orthogonal polynomials with degrees well into the millions.
 
@@ -8,7 +8,7 @@ This package provides a Julia wrapper for the [C library](https://github.com/Mik
 
 ## Installation
 
-The build script, which works on macOS, Linux, and Windows systems, downloads precompiled binaries of the latest version of [FastTransforms](https://github.com/MikaelSlevinsky/FastTransforms) or builds them from source, as governed by the environment variable `FT_BUILD_FROM_SOURCE=true/false`. This library depends on `FFTW`, `MPFR`, and `OpenBLAS` (on Linux and Windows), which must be present for a successful build. With dependencies, installation may be as straightforward as:
+Installation, which uses [BinaryBuilder](https://github.com/JuliaPackaging/BinaryBuilder.jl) for Intel processors (Sandybridge and beyond), may be as straightforward as:
 
 ```julia
 pkg> add FastTransforms
diff --git a/deps/build.jl b/deps/build.jl
index 7b419f95..90d15b5d 100644
--- a/deps/build.jl
+++ b/deps/build.jl
@@ -1,7 +1,7 @@
 using BinaryProvider
 import Libdl
 
-version = v"0.2.9"
+version = v"0.2.12"
 
 if arch(platform_key_abi()) != :x86_64
     @warn "FastTransforms has only been tested on x86_64 architectures."
@@ -20,11 +20,7 @@ print_platform_error(p::MacOS) = "On MacOS\n\tbrew install gcc@8 fftw mpfr\n"
 print_platform_error(p::Linux) = "On Linux\n\tsudo apt-get install gcc-8 libblas-dev libopenblas-base libfftw3-dev libmpfr-dev\n"
 print_platform_error(p::Windows) = "On Windows\n\tvcpkg install openblas:x64-windows fftw3[core,threads]:x64-windows mpir:x64-windows mpfr:x64-windows\n"
 
-# Rationale is as follows: The build is pretty fast, so on Linux it is typically easiest
-# to just use the gcc of the system to build the library and include it. On MacOS, however,
-# we need to actually install a gcc first, because Apple's OS comes only shipped with clang,
-# so here we download the binary.
-ft_build_from_source = get(ENV, "FT_BUILD_FROM_SOURCE", Sys.isapple() ? "false" : "true")
+ft_build_from_source = get(ENV, "FT_BUILD_FROM_SOURCE", "false")
 if ft_build_from_source == "true"
     make = Sys.iswindows() ? "mingw32-make" : "make"
     compiler = Sys.isapple() ? "CC=gcc-8" : "CC=gcc"
@@ -51,19 +47,4 @@ if ft_build_from_source == "true"
         print_error()
     end
     println("FastTransforms built from source.")
-else
-    const GCC = BinaryProvider.detect_compiler_abi().gcc_version
-    namemap = Dict(:gcc4 => "gcc-4.9", :gcc5 => "gcc-5", :gcc6 => "gcc-6",
-                   :gcc7 => "gcc-7", :gcc8 => "gcc-8", :gcc9 => "gcc-9")
-    if !(GCC in keys(namemap))
-        error("Please ensure you have a version of gcc from gcc-4.9 to gcc-9.")
-    end
-    try
-        download("https://github.com/MikaelSlevinsky/FastTransforms/releases/download/" *
-                 "v$version/libfasttransforms.v$version.$(namemap[GCC]).$extension",
-                 joinpath(dirname(@__DIR__), "deps", "libfasttransforms.$extension"))
-    catch
-        print_error()
-    end
-    println("FastTransforms installed by downloading binaries.")
 end
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 56027628..7923287d 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,7 +1,8 @@
 module FastTransforms
 
-using DSP, FastGaussQuadrature, Libdl, LinearAlgebra, SpecialFunctions, ToeplitzMatrices
-using Reexport
+using DSP, FastGaussQuadrature, LinearAlgebra
+using Reexport, SpecialFunctions, ToeplitzMatrices
+
 @reexport using AbstractFFTs
 @reexport using FFTW
 
@@ -19,11 +20,11 @@ import AbstractFFTs: Plan, ScaledPlan,
                      rfft_output_size, brfft_output_size,
                      plan_inv, normalization
 
+import DSP: conv
+
 import FFTW: dct, dct!, idct, idct!, plan_dct!, plan_idct!,
              plan_dct, plan_idct, fftwNumber
 
-import DSP: conv
-
 import FastGaussQuadrature: unweightedgausshermite
 
 import LinearAlgebra: mul!, lmul!, ldiv!
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 0c7d4207..d1fa1bc1 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -1,8 +1,13 @@
-const libfasttransforms = find_library("libfasttransforms", [joinpath(dirname(@__DIR__), "deps")])
-
-if libfasttransforms ≡ nothing || length(libfasttransforms) == 0
-    error("FastTransforms is not properly installed. Please run Pkg.build(\"FastTransforms\") ",
-          "and restart Julia.")
+ft_build_from_source = get(ENV, "FT_BUILD_FROM_SOURCE", "false")
+if ft_build_from_source == "true"
+    using Libdl
+    const libfasttransforms = find_library("libfasttransforms", [joinpath(dirname(@__DIR__), "deps")])
+    if libfasttransforms ≡ nothing || length(libfasttransforms) == 0
+        error("FastTransforms is not properly installed. Please run Pkg.build(\"FastTransforms\") ",
+              "and restart Julia.")
+    end
+else
+    using FastTransforms_jll
 end
 
 function ft_fftw_plan_with_nthreads(n::Integer)
diff --git a/test/hermitetests.jl b/test/hermitetests.jl
index c931d190..2e0a7f1a 100644
--- a/test/hermitetests.jl
+++ b/test/hermitetests.jl
@@ -7,7 +7,7 @@ hermitepoints(n) = FastGaussQuadrature.unweightedgausshermite( n )[1]
     @test hermitepoints(100_000)[end] ≈ 446.9720305443094
 
     @test weightedhermitetransform([1.0]) == [1.0]
-    @test weightedhermitetransform(exp.(-hermitepoints(2).^2/2)) == [1.0,0.0]
+    @test weightedhermitetransform(exp.(-hermitepoints(2).^2/2)) ≈ [1.0,0.0]
     @test weightedhermitetransform(exp.(-hermitepoints(3).^2/2)) ≈ [1.0,0.0,0.0]
     @test weightedhermitetransform(exp.(-hermitepoints(1000).^2/2)) ≈ [1.0; zeros(999)]
     @test weightedhermitetransform(exp.(-hermitepoints(3000).^2/2)) ≈ [1.0; zeros(2999)]

From 12656f4b5bf23d59c894cf3d4ecd4fcb5ba68ba8 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 26 Nov 2019 22:31:05 -0600
Subject: [PATCH 002/222] upper bound julia and FastTransforms_jll

---
 Project.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index ac355807..61aacbac 100644
--- a/Project.toml
+++ b/Project.toml
@@ -22,7 +22,8 @@ BinaryProvider = "0.5.8"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
+FastTransforms_jll = "0.2.12"
 Reexport = "0.2"
 SpecialFunctions = "0.8"
 ToeplitzMatrices = "0.6"
-julia = "≥ 1.3"
+julia = "1.3"

From 3f1820fbafa847d17253ce96081bc9bb21a022c2 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 27 Nov 2019 09:45:39 +0000
Subject: [PATCH 003/222] CompatHelper: bump compat for "SpecialFunctions" to
 "0.9" (#93)

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 61aacbac..3cc0a47b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -24,6 +24,6 @@ FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.2.12"
 Reexport = "0.2"
-SpecialFunctions = "0.8"
+SpecialFunctions = "0.8, 0.9"
 ToeplitzMatrices = "0.6"
 julia = "1.3"

From 04a2885f482eb8d3064fa84f33f149a72379ca1a Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Wed, 27 Nov 2019 09:46:02 +0000
Subject: [PATCH 004/222] v0.8.1

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 3cc0a47b..c90d1742 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.8.0"
+version = "0.8.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From 6c90d83f86c2566229a8002de1aac67cfb3b9043 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Wed, 27 Nov 2019 15:08:46 +0000
Subject: [PATCH 005/222] Work around broken DSP

---
 src/FastTransforms.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 7923287d..d00ee88e 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,8 +1,10 @@
 module FastTransforms
 
-using DSP, FastGaussQuadrature, LinearAlgebra
+using FastGaussQuadrature, LinearAlgebra
 using Reexport, SpecialFunctions, ToeplitzMatrices
 
+import DSP
+
 @reexport using AbstractFFTs
 @reexport using FFTW
 

From f83cd2e874f0cf11465363b34e4a0024039dbf72 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 3 Dec 2019 22:45:55 +0000
Subject: [PATCH 006/222] syntactic sugar for p*I

---
 src/libfasttransforms.jl       |  7 +++++++
 test/libfasttransformstests.jl | 24 ++++++++++++------------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index d1fa1bc1..447c2ac4 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -492,6 +492,13 @@ end
 \(p::AdjointFTPlan{T}, x::Array{T}) where T = ldiv!(p, deepcopy(x))
 \(p::TransposeFTPlan{T}, x::Array{T}) where T = ldiv!(p, deepcopy(x))
 
+*(p::FTPlan{T, 1}, x::UniformScaling{S}) where {T, S} = lmul!(p, Matrix{promote_type(T, S)}(x, p.n, p.n))
+*(p::AdjointFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = lmul!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n))
+*(p::TransposeFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = lmul!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n))
+\(p::FTPlan{T, 1}, x::UniformScaling{S}) where {T, S} = ldiv!(p, Matrix{promote_type(T, S)}(x, p.n, p.n))
+\(p::AdjointFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = ldiv!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n))
+\(p::TransposeFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = ldiv!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n))
+
 for (fJ, fC, elty) in ((:lmul!, :ft_bfmvf, :Float32),
                        (:ldiv!, :ft_bfsvf, :Float32),
                        (:lmul!, :ft_bfmv , :Float64),
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 00ca4f4f..9daee2a2 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -32,23 +32,23 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
             y = p2'\z
             z = p2\y
             @test z ≈ x
-            P = p1*Id
+            P = p1*I
             Q = p2*P
-            @test Q ≈ Id
-            P = p1*Id
+            @test Q ≈ I
+            P = p1*I
             Q = p1'P
             P = transpose(p1)*Q
             Q = transpose(p1)\P
             P = p1'\Q
             Q = p1\P
-            @test Q ≈ Id
-            P = p2*Id
+            @test Q ≈ I
+            P = p2*I
             Q = p2'P
             P = transpose(p2)*Q
             Q = transpose(p2)\P
             P = p2'\Q
             Q = p2\P
-            @test Q ≈ Id
+            @test Q ≈ I
         end
     end
 
@@ -79,23 +79,23 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
             y = p2'\z
             z = p2\y
             @test z ≈ x
-            P = p1*Id
+            P = p1*I
             Q = p2*P
-            @test_skip Q ≈ Id
-            P = p1*Id
+            @test_skip Q ≈ I
+            P = p1*I
             Q = p1'P
             P = transpose(p1)*Q
             Q = transpose(p1)\P
             P = p1'\Q
             Q = p1\P
-            @test_skip Q ≈ Id
-            P = p2*Id
+            @test_skip Q ≈ I
+            P = p2*I
             Q = p2'P
             P = transpose(p2)*Q
             Q = transpose(p2)\P
             P = p2'\Q
             Q = p2\P
-            @test_skip Q ≈ Id
+            @test_skip Q ≈ I
         end
     end
 

From 99ad9ad2612823b8b7577fcd4756a7edca135b49 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Fri, 6 Dec 2019 00:21:38 +0000
Subject: [PATCH 007/222] Close #95

---
 test/libfasttransformstests.jl | 198 +++++++++++++++------------------
 test/runtests.jl               |  10 +-
 2 files changed, 95 insertions(+), 113 deletions(-)

diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 9daee2a2..6492a402 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -5,6 +5,43 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
 @testset "libfasttransforms" begin
     n = 64
     α, β, γ, δ, λ, μ = 0.1, 0.2, 0.3, 0.4, 0.5, 0.6
+    function test_1d_plans(p1, p2, x; skip::Bool=false)
+        y = p1*x
+        z = p2*y
+        @test z ≈ x
+        y = p1*x
+        z = p1'y
+        y = transpose(p1)*z
+        z = transpose(p1)\y
+        y = p1'\z
+        z = p1\y
+        @test z ≈ x
+        y = p2*x
+        z = p2'y
+        y = transpose(p2)*z
+        z = transpose(p2)\y
+        y = p2'\z
+        z = p2\y
+        @test z ≈ x
+        P = p1*I
+        Q = p2*P
+        skip ? (@test_skip Q ≈ I) : (@test Q ≈ I)
+        P = p1*I
+        Q = p1'P
+        P = transpose(p1)*Q
+        Q = transpose(p1)\P
+        P = p1'\Q
+        Q = p1\P
+        skip ? (@test_skip Q ≈ I) : (@test Q ≈ I)
+        P = p2*I
+        Q = p2'P
+        P = transpose(p2)*Q
+        Q = transpose(p2)\P
+        P = p2'\Q
+        Q = p2\P
+        skip ? (@test_skip Q ≈ I) : (@test Q ≈ I)
+    end
+
     for T in (Float32, Float64, Complex{Float32}, Complex{Float64})
         x = T(1)./(1:n)
         Id = Matrix{T}(I, n, n)
@@ -15,40 +52,7 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
                           (plan_jac2ultra(Id, α, β, λ), plan_ultra2jac(Id, λ, α, β)),
                           (plan_jac2cheb(Id, α, β), plan_cheb2jac(Id, α, β)),
                           (plan_ultra2cheb(Id, λ), plan_cheb2ultra(Id, λ)))
-            y = p1*x
-            z = p2*y
-            @test z ≈ x
-            y = p1*x
-            z = p1'y
-            y = transpose(p1)*z
-            z = transpose(p1)\y
-            y = p1'\z
-            z = p1\y
-            @test z ≈ x
-            y = p2*x
-            z = p2'y
-            y = transpose(p2)*z
-            z = transpose(p2)\y
-            y = p2'\z
-            z = p2\y
-            @test z ≈ x
-            P = p1*I
-            Q = p2*P
-            @test Q ≈ I
-            P = p1*I
-            Q = p1'P
-            P = transpose(p1)*Q
-            Q = transpose(p1)\P
-            P = p1'\Q
-            Q = p1\P
-            @test Q ≈ I
-            P = p2*I
-            Q = p2'P
-            P = transpose(p2)*Q
-            Q = transpose(p2)\P
-            P = p2'\Q
-            Q = p2\P
-            @test Q ≈ I
+            test_1d_plans(p1, p2, x)
         end
     end
 
@@ -62,87 +66,69 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
                           (plan_jac2ultra(Id, α, β, λ), plan_ultra2jac(Id, λ, α, β)),
                           (plan_jac2cheb(Id, α, β), plan_cheb2jac(Id, α, β)),
                           (plan_ultra2cheb(Id, λ), plan_cheb2ultra(Id, λ)))
-            y = p1*x
-            z = p2*y
-            @test z ≈ x
-            y = p1*x
-            z = p1'y
-            y = transpose(p1)*z
-            z = transpose(p1)\y
-            y = p1'\z
-            z = p1\y
-            @test z ≈ x
-            y = p2*x
-            z = p2'y
-            y = transpose(p2)*z
-            z = transpose(p2)\y
-            y = p2'\z
-            z = p2\y
-            @test z ≈ x
-            P = p1*I
-            Q = p2*P
-            @test_skip Q ≈ I
-            P = p1*I
-            Q = p1'P
-            P = transpose(p1)*Q
-            Q = transpose(p1)\P
-            P = p1'\Q
-            Q = p1\P
-            @test_skip Q ≈ I
-            P = p2*I
-            Q = p2'P
-            P = transpose(p2)*Q
-            Q = transpose(p2)\P
-            P = p2'\Q
-            Q = p2\P
-            @test_skip Q ≈ I
+            test_1d_plans(p1, p2, x; skip=true)
         end
     end
 
-    for T in (Float64, Complex{Float64})
-        A = T <: Real ? sphones(T, n, 2n-1) : sphones(T, n, 2n-1) + im*sphones(T, n, 2n-1)
-        p = plan_sph2fourier(A)
-        ps = plan_sph_synthesis(A)
-        pa = plan_sph_analysis(A)
+    function test_nd_plans(p, ps, pa, A)
         B = copy(A)
         C = ps*(p*A)
         A = p\(pa*C)
         @test A ≈ B
+    end
 
-        A = T <: Real ? sphvones(T, n, 2n-1) : sphvones(T, n, 2n-1) + im*sphvones(T, n, 2n-1)
-        p = plan_sphv2fourier(A)
-        ps = plan_sphv_synthesis(A)
-        pa = plan_sphv_analysis(A)
-        B = copy(A)
-        C = ps*(p*A)
-        A = p\(pa*C)
-        @test A ≈ B
+    A = sphones(Float64, n, 2n-1)
+    p = plan_sph2fourier(A)
+    ps = plan_sph_synthesis(A)
+    pa = plan_sph_analysis(A)
+    test_nd_plans(p, ps, pa, A)
+    A = sphones(Float64, n, 2n-1) + im*sphones(Float64, n, 2n-1)
+    p = plan_sph2fourier(A)
+    ps = plan_sph_synthesis(A)
+    pa = plan_sph_analysis(A)
+    test_nd_plans(p, ps, pa, A)
 
-        A = T <: Real ? diskones(T, n, 4n-3) : diskones(T, n, 4n-3) + im*diskones(T, n, 4n-3)
-        p = plan_disk2cxf(A)
-        ps = plan_disk_synthesis(A)
-        pa = plan_disk_analysis(A)
-        B = copy(A)
-        C = ps*(p*A)
-        A = p\(pa*C)
-        @test A ≈ B
+    A = sphvones(Float64, n, 2n-1)
+    p = plan_sphv2fourier(A)
+    ps = plan_sphv_synthesis(A)
+    pa = plan_sphv_analysis(A)
+    test_nd_plans(p, ps, pa, A)
+    A = sphvones(Float64, n, 2n-1) + im*sphvones(Float64, n, 2n-1)
+    p = plan_sphv2fourier(A)
+    ps = plan_sphv_synthesis(A)
+    pa = plan_sphv_analysis(A)
+    test_nd_plans(p, ps, pa, A)
 
-        A = T <: Real ? triones(T, n, n) : triones(T, n, n) + im*triones(T, n, n)
-        p = plan_tri2cheb(A, α, β, γ)
-        ps = plan_tri_synthesis(A)
-        pa = plan_tri_analysis(A)
-        B = copy(A)
-        C = ps*(p*A)
-        A = p\(pa*C)
-        @test A ≈ B
+    A = diskones(Float64, n, 4n-3)
+    p = plan_disk2cxf(A)
+    ps = plan_disk_synthesis(A)
+    pa = plan_disk_analysis(A)
+    test_nd_plans(p, ps, pa, A)
+    A = diskones(Float64, n, 4n-3) + im*diskones(Float64, n, 4n-3)
+    p = plan_disk2cxf(A)
+    ps = plan_disk_synthesis(A)
+    pa = plan_disk_analysis(A)
+    test_nd_plans(p, ps, pa, A)
 
-        A = T <: Real ? tetones(T, n, n, n) : tetones(T, n, n, n) + im*tetones(T, n, n, n)
-        p = plan_tet2cheb(A, α, β, γ, δ)
-        ps = plan_tet_synthesis(A)
-        pa = plan_tet_analysis(A)
-        B = copy(A)
-        C = ps*(p*A)
-        A = p\(pa*C)
-        @test A ≈ B
-    end
+    A = triones(Float64, n, n)
+    p = plan_tri2cheb(A, α, β, γ)
+    ps = plan_tri_synthesis(A)
+    pa = plan_tri_analysis(A)
+    test_nd_plans(p, ps, pa, A)
+    A = triones(Float64, n, n) + im*triones(Float64, n, n)
+    p = plan_tri2cheb(A, α, β, γ)
+    ps = plan_tri_synthesis(A)
+    pa = plan_tri_analysis(A)
+    test_nd_plans(p, ps, pa, A)
+
+    A = tetones(Float64, n, n, n)
+    p = plan_tet2cheb(A, α, β, γ, δ)
+    ps = plan_tet_synthesis(A)
+    pa = plan_tet_analysis(A)
+    test_nd_plans(p, ps, pa, A)
+    A = tetones(Float64, n, n, n) + im*tetones(Float64, n, n, n)
+    p = plan_tet2cheb(A, α, β, γ, δ)
+    ps = plan_tet_synthesis(A)
+    pa = plan_tet_analysis(A)
+    test_nd_plans(p, ps, pa, A)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 0d5c4592..e5e341bd 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,4 +1,4 @@
-using FastTransforms, Libdl, LinearAlgebra, Test
+using FastTransforms, LinearAlgebra, Test
 
 include("specialfunctionstests.jl")
 
@@ -6,16 +6,12 @@ include("chebyshevtests.jl")
 
 include("quadraturetests.jl")
 
-if find_library(FastTransforms.libfasttransforms) ≡ FastTransforms.libfasttransforms
-    include("libfasttransformstests.jl")
-else
-    error("FastTransforms is not properly installed. Please run Pkg.build(\"FastTransforms\") ",
-          "and restart Julia.")
-end
+include("libfasttransformstests.jl")
 
 include("nuffttests.jl")
 
 include("fftBigFloattests.jl")
+
 include("paduatests.jl")
 
 include("gaunttests.jl")

From 56aff53cbf493b935b251bbf013d3ed6ba816157 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 14 Jan 2020 18:55:47 -0600
Subject: [PATCH 008/222] Close
 https://github.com/JuliaCI/BaseBenchmarks.jl/issues/253

See also:

https://github.com/MikaelSlevinsky/FastTransforms/pull/42

https://github.com/JuliaPackaging/Yggdrasil/pull/384
---
 Project.toml  | 4 ++--
 deps/build.jl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Project.toml b/Project.toml
index c90d1742..910a073b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.8.1"
+version = "0.8.2"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -22,7 +22,7 @@ BinaryProvider = "0.5.8"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.2.12"
+FastTransforms_jll = "0.2.13"
 Reexport = "0.2"
 SpecialFunctions = "0.8, 0.9"
 ToeplitzMatrices = "0.6"
diff --git a/deps/build.jl b/deps/build.jl
index 90d15b5d..8300b133 100644
--- a/deps/build.jl
+++ b/deps/build.jl
@@ -1,7 +1,7 @@
 using BinaryProvider
 import Libdl
 
-version = v"0.2.12"
+version = v"0.2.13"
 
 if arch(platform_key_abi()) != :x86_64
     @warn "FastTransforms has only been tested on x86_64 architectures."

From 7b993c587c85077d1afeddc755b6268f0b0ba538 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 14 Jan 2020 19:31:48 -0600
Subject: [PATCH 009/222] allow failures on cirrusci for freebsd

---
 .cirrus.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.cirrus.yml b/.cirrus.yml
index d7ba1cf2..d3a93505 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -6,6 +6,7 @@ task:
     matrix:
       - JULIA_VERSION: 1.3
       - JULIA_VERSION: nightly
+  allow_failures: $JULIA_VERSION == 'nightly'
   install_script:
     - sh -c "$(fetch https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh -o -)"
   build_script:

From 2305f89abb4610753c2f044d39c996c652b4e80e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 14 Jan 2020 19:34:06 -0600
Subject: [PATCH 010/222] CompatHelper: bump compat for "SpecialFunctions" to
 "0.10" (#99)

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 910a073b..2af83a6f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -24,6 +24,6 @@ FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.2.13"
 Reexport = "0.2"
-SpecialFunctions = "0.8, 0.9"
+SpecialFunctions = "0.8, 0.9, 0.10"
 ToeplitzMatrices = "0.6"
 julia = "1.3"

From a1b5b352807ff12a32e401218720bb71a83ab625 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Mon, 27 Jan 2020 09:48:18 -0600
Subject: [PATCH 011/222] Test on 1.4

---
 .appveyor.yml | 1 +
 .cirrus.yml   | 1 +
 .travis.yml   | 1 +
 3 files changed, 3 insertions(+)

diff --git a/.appveyor.yml b/.appveyor.yml
index 787c206b..06bb417a 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,6 +1,7 @@
 environment:
   matrix:
   - julia_version: 1.3
+  - julia_version: 1.4
   - julia_version: nightly
 
 platform:
diff --git a/.cirrus.yml b/.cirrus.yml
index d3a93505..fa4972fa 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -5,6 +5,7 @@ task:
   env:
     matrix:
       - JULIA_VERSION: 1.3
+      - JULIA_VERSION: 1.4
       - JULIA_VERSION: nightly
   allow_failures: $JULIA_VERSION == 'nightly'
   install_script:
diff --git a/.travis.yml b/.travis.yml
index 2b2d6154..144b82cb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,6 +5,7 @@ os:
   - osx
 julia:
   - 1.3
+  - 1.4
   - nightly
 matrix:
   allow_failures:

From 84be69fff20577a6335916d7a20487f2c40c66fe Mon Sep 17 00:00:00 2001
From: Julia TagBot <50554310+JuliaTagBot@users.noreply.github.com>
Date: Sun, 9 Feb 2020 04:27:32 +0700
Subject: [PATCH 012/222] Install TagBot as a GitHub Action (#100)

---
 .github/workflows/TagBot.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 .github/workflows/TagBot.yml

diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
new file mode 100644
index 00000000..d77d3a0c
--- /dev/null
+++ b/.github/workflows/TagBot.yml
@@ -0,0 +1,11 @@
+name: TagBot
+on:
+  schedule:
+    - cron: 0 * * * *
+jobs:
+  TagBot:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: JuliaRegistries/TagBot@v1
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}

From 488993150e22052647cc9ed282e06a0cc7cfb1ff Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Sun, 26 Apr 2020 20:39:40 -0500
Subject: [PATCH 013/222] Add spin-weighted spherical harmonics (#103)

* add spin-weighted spherical harmonics

* update freebsd image

* add horner and clenshaw routines

don't export
---
 .cirrus.yml                    |   2 +-
 Project.toml                   |   4 +-
 README.md                      |   2 +-
 examples/spinweighted.jl       |  64 ++++++++++++++++++
 src/FastTransforms.jl          |   6 +-
 src/libfasttransforms.jl       | 116 +++++++++++++++++++++++++++++----
 src/specialfunctions.jl        |  47 +++++++++++++
 test/libfasttransformstests.jl |  27 +++++++-
 8 files changed, 249 insertions(+), 19 deletions(-)
 create mode 100644 examples/spinweighted.jl

diff --git a/.cirrus.yml b/.cirrus.yml
index fa4972fa..d7653148 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -1,5 +1,5 @@
 freebsd_instance:
-  image: freebsd-12-0-release-amd64
+  image: freebsd-12-1-release-amd64
 task:
   name: FreeBSD
   env:
diff --git a/Project.toml b/Project.toml
index 2af83a6f..a01378a2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.8.2"
+version = "0.9.0"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -22,7 +22,7 @@ BinaryProvider = "0.5.8"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.2.13"
+FastTransforms_jll = "0.3.0"
 Reexport = "0.2"
 SpecialFunctions = "0.8, 0.9, 0.10"
 ToeplitzMatrices = "0.6"
diff --git a/README.md b/README.md
index c3c0431c..e746e1b0 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ julia> using FastTransforms, LinearAlgebra
 
 ## Fast orthogonal polynomial transforms
 
-The 26 orthogonal polynomial transforms are listed in `FastTransforms.kind2string.(0:25)`. Univariate transforms may be planned with the standard normalization or with orthonormalization. For multivariate transforms, the standard normalization may be too severe for floating-point computations, so it is omitted. Here are two examples:
+The 29 orthogonal polynomial transforms are listed in `FastTransforms.kind2string.(0:28)`. Univariate transforms may be planned with the standard normalization or with orthonormalization. For multivariate transforms, the standard normalization may be too severe for floating-point computations, so it is omitted. Here are two examples:
 
 ### The Chebyshev--Legendre transform
 
diff --git a/examples/spinweighted.jl b/examples/spinweighted.jl
new file mode 100644
index 00000000..344a0785
--- /dev/null
+++ b/examples/spinweighted.jl
@@ -0,0 +1,64 @@
+#############
+# This example plays with analysis of:
+#
+#   f(r) = e^{i k⋅r},
+#
+# for some k ∈ ℝ³ and where r ∈ 𝕊², using spin-0 spherical harmonics.
+#
+# It applies ð, the spin-raising operator,
+# both on the spin-0 coefficients as well as the original function,
+# followed by a spin-1 analysis to compare coefficients.
+#
+# See also sphere.jl
+# For the storage pattern of the arrays, please consult the documentation.
+#############
+
+using FastTransforms, LinearAlgebra
+
+# The colatitudinal grid (mod π):
+N = 10
+θ = (0.5:N-0.5)/N
+
+# The longitudinal grid (mod π):
+M = 2*N-1
+φ = (0:M-1)*2/M
+
+k = [2/7, 3/7, 6/7]
+r = (θ,φ) -> [sinpi(θ)*cospi(φ), sinpi(θ)*sinpi(φ), cospi(θ)]
+
+# On the tensor product grid, our function samples are:
+
+F = [exp(im*(k⋅r(θ,φ))) for θ in θ, φ in φ]
+
+P = plan_spinsph2fourier(F, 0)
+PA = plan_spinsph_analysis(F, 0)
+
+# Its spin-0 spherical harmonic coefficients are:
+
+U⁰ = P\(PA*F)
+
+norm(U⁰) ≈ sqrt(4π)
+
+# Spin can be incremented by applying ð, either on the spin-0 coefficients:
+
+U¹c = zero(U⁰)
+for n in 1:N-1
+    U¹c[n, 1] = sqrt(n*(n+1))*U⁰[n+1, 1]
+end
+for m in 1:M÷2
+    for n in 0:N-1
+        U¹c[n+1, 2m] = -sqrt((n+m)*(n+m+1))*U⁰[n+1, 2m]
+        U¹c[n+1, 2m+1] = sqrt((n+m)*(n+m+1))*U⁰[n+1, 2m+1]
+    end
+end
+
+# or on the original function through analysis with spin-1 spherical harmonics:
+
+F = [-(k[1]*(im*cospi(θ)*cospi(φ) + sinpi(φ)) + k[2]*(im*cospi(θ)*sinpi(φ)-cospi(φ)) - im*k[3]*sinpi(θ))*exp(im*(k⋅r(θ,φ))) for θ in θ, φ in φ]
+
+P = plan_spinsph2fourier(F, 1)
+PA = plan_spinsph_analysis(F, 1)
+
+U¹s = P\(PA*F)
+
+norm(U¹c) ≈ norm(U¹s) ≈ sqrt(8π/3*(k⋅k))
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index d00ee88e..f3ea069f 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -44,7 +44,8 @@ export plan_leg2cheb, plan_cheb2leg, plan_ultra2ultra, plan_jac2jac,
        plan_sphv2fourier, plan_sphv_synthesis, plan_sphv_analysis,
        plan_disk2cxf, plan_disk_synthesis, plan_disk_analysis,
        plan_tri2cheb, plan_tri_synthesis, plan_tri_analysis,
-       plan_tet2cheb, plan_tet_synthesis, plan_tet_analysis
+       plan_tet2cheb, plan_tet_synthesis, plan_tet_analysis,
+       plan_spinsph2fourier, plan_spinsph_synthesis, plan_spinsph_analysis
 
 include("libfasttransforms.jl")
 
@@ -87,7 +88,8 @@ export sphones, sphzeros, sphrand, sphrandn, sphevaluate,
        sphvones, sphvzeros, sphvrand, sphvrandn,
        diskones, diskzeros, diskrand, diskrandn,
        triones, trizeros, trirand, trirandn, trievaluate,
-       tetones, tetzeros, tetrand, tetrandn
+       tetones, tetzeros, tetrand, tetrandn,
+       spinsphones, spinsphzeros, spinsphrand, spinsphrandn
 
 lgamma(x) = logabsgamma(x)[1]
 
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 447c2ac4..4cbff2ac 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -43,6 +43,38 @@ end
 
 set_num_threads(n::Integer) = ccall((:ft_set_num_threads, libfasttransforms), Cvoid, (Cint, ), n)
 
+function horner!(c::Vector{Float64}, x::Vector{Float64}, f::Vector{Float64})
+    @assert length(x) == length(f)
+    ccall((:ft_horner, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, 1, length(x), x, f)
+end
+
+function horner!(c::Vector{Float32}, x::Vector{Float32}, f::Vector{Float32})
+    @assert length(x) == length(f)
+    ccall((:ft_hornerf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, 1, length(x), x, f)
+end
+
+function clenshaw!(c::Vector{Float64}, x::Vector{Float64}, f::Vector{Float64})
+    @assert length(x) == length(f)
+    ccall((:ft_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, 1, length(x), x, f)
+end
+
+function clenshaw!(c::Vector{Float32}, x::Vector{Float32}, f::Vector{Float32})
+    @assert length(x) == length(f)
+    ccall((:ft_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, 1, length(x), x, f)
+end
+
+function clenshaw!(c::Vector{Float64}, A::Vector{Float64}, B::Vector{Float64}, C::Vector{Float64}, x::Vector{Float64}, phi0::Vector{Float64}, f::Vector{Float64})
+    @assert length(c) == length(A) == length(B) == length(C)-1
+    @assert length(x) == length(phi0) == length(f)
+    ccall((:ft_orthogonal_polynomial_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Cint, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}), length(c), c, 1, A, B, C, length(x), x, phi0, f)
+end
+
+function clenshaw!(c::Vector{Float32}, A::Vector{Float32}, B::Vector{Float32}, C::Vector{Float32}, x::Vector{Float32}, phi0::Vector{Float32}, f::Vector{Float32})
+    @assert length(c) == length(A) == length(B) == length(C)-1
+    @assert length(x) == length(phi0) == length(f)
+    ccall((:ft_orthogonal_polynomial_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cint, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}), length(c), c, 1, A, B, C, length(x), x, phi0, f)
+end
+
 const LEG2CHEB              = 0
 const CHEB2LEG              = 1
 const ULTRA2ULTRA           = 2
@@ -59,16 +91,20 @@ const SPHEREV              = 12
 const DISK                 = 13
 const TRIANGLE             = 14
 const TETRAHEDRON          = 15
-const SPHERESYNTHESIS      = 16
-const SPHEREANALYSIS       = 17
-const SPHEREVSYNTHESIS     = 18
-const SPHEREVANALYSIS      = 19
-const DISKSYNTHESIS        = 20
-const DISKANALYSIS         = 21
-const TRIANGLESYNTHESIS    = 22
-const TRIANGLEANALYSIS     = 23
-const TETRAHEDRONSYNTHESIS = 24
-const TETRAHEDRONANALYSIS  = 25
+const SPINSPHERE           = 16
+const SPHERESYNTHESIS      = 17
+const SPHEREANALYSIS       = 18
+const SPHEREVSYNTHESIS     = 19
+const SPHEREVANALYSIS      = 20
+const DISKSYNTHESIS        = 21
+const DISKANALYSIS         = 22
+const TRIANGLESYNTHESIS    = 23
+const TRIANGLEANALYSIS     = 24
+const TETRAHEDRONSYNTHESIS = 25
+const TETRAHEDRONANALYSIS  = 26
+const SPINSPHERESYNTHESIS  = 27
+const SPINSPHEREANALYSIS   = 28
+
 
 let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                CHEB2LEG             => "Chebyshev--Legendre",
@@ -86,6 +122,7 @@ let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                DISK                 => "Zernike--Chebyshev×Fourier",
                TRIANGLE             => "Proriol--Chebyshev²",
                TETRAHEDRON          => "Proriol--Chebyshev³",
+               SPINSPHERE           => "Spin-weighted spherical harmonic--Fourier",
                SPHERESYNTHESIS      => "FFTW Fourier synthesis on the sphere",
                SPHEREANALYSIS       => "FFTW Fourier analysis on the sphere",
                SPHEREVSYNTHESIS     => "FFTW Fourier synthesis on the sphere (vector field)",
@@ -95,7 +132,9 @@ let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                TRIANGLESYNTHESIS    => "FFTW Chebyshev synthesis on the triangle",
                TRIANGLEANALYSIS     => "FFTW Chebyshev analysis on the triangle",
                TETRAHEDRONSYNTHESIS => "FFTW Chebyshev synthesis on the tetrahedron",
-               TETRAHEDRONANALYSIS  => "FFTW Chebyshev analysis on the tetrahedron")
+               TETRAHEDRONANALYSIS  => "FFTW Chebyshev analysis on the tetrahedron",
+               SPINSPHERESYNTHESIS  => "FFTW Fourier synthesis on the sphere (spin-weighted)",
+               SPINSPHEREANALYSIS   => "FFTW Fourier analysis on the sphere (spin-weighted)")
     global kind2string
     kind2string(k::Integer) = k2s[Int(k)]
 end
@@ -132,6 +171,7 @@ show(io::IO, p::FTPlan{T, 2, SPHEREV}) where T = print(io, "FastTransforms ", ki
 show(io::IO, p::FTPlan{T, 2, DISK}) where T = print(io, "FastTransforms ", kind2string(DISK), " plan for $(p.n)×$(4p.n-3)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, TRIANGLE}) where T = print(io, "FastTransforms ", kind2string(TRIANGLE), " plan for $(p.n)×$(p.n)-element array of ", T)
 show(io::IO, p::FTPlan{T, 3, TETRAHEDRON}) where T = print(io, "FastTransforms ", kind2string(TETRAHEDRON), " plan for $(p.n)×$(p.n)×$(p.n)-element array of ", T)
+show(io::IO, p::FTPlan{T, 2, SPINSPHERE}) where T = print(io, "FastTransforms ", kind2string(SPINSPHERE), " plan for $(p.n)×$(2p.n-1)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, K}) where {T, K} = print(io, "FastTransforms plan for ", kind2string(K), " for $(p.n)×$(p.m)-element array of ", T)
 show(io::IO, p::FTPlan{T, 3, K}) where {T, K} = print(io, "FastTransforms plan for ", kind2string(K), " for $(p.n)×$(p.l)×$(p.m)-element array of ", T)
 
@@ -141,7 +181,7 @@ function checksize(p::FTPlan{T}, x::Array{T}) where T
     end
 end
 
-for K in (SPHERE, SPHEREV, DISK)
+for K in (SPHERE, SPHEREV, DISK, SPINSPHERE)
     @eval function checksize(p::FTPlan{T, 2, $K}, x::Matrix{T}) where T
         if p.n != size(x, 1)
             throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.n), x has leading dimension $(size(x, 1))"))
@@ -160,6 +200,7 @@ destroy_plan(p::FTPlan{Float64, 1}) = ccall((:ft_destroy_tb_eigen_FMM, libfasttr
 destroy_plan(p::FTPlan{BigFloat, 1}) = ccall((:ft_mpfr_destroy_plan, libfasttransforms), Cvoid, (Ptr{mpfr_t}, Cint), p, p.n)
 destroy_plan(p::FTPlan{Float64, 2}) = ccall((:ft_destroy_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 3}) = ccall((:ft_destroy_tetrahedral_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHERE}) = ccall((:ft_destroy_spin_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, SPHERESYNTHESIS}) = ccall((:ft_destroy_sphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, SPHEREANALYSIS}) = ccall((:ft_destroy_sphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, SPHEREVSYNTHESIS}) = ccall((:ft_destroy_sphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
@@ -170,6 +211,8 @@ destroy_plan(p::FTPlan{Float64, 2, TRIANGLESYNTHESIS}) = ccall((:ft_destroy_tria
 destroy_plan(p::FTPlan{Float64, 2, TRIANGLEANALYSIS}) = ccall((:ft_destroy_triangle_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 3, TETRAHEDRONSYNTHESIS}) = ccall((:ft_destroy_tetrahedron_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 3, TETRAHEDRONANALYSIS}) = ccall((:ft_destroy_tetrahedron_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHERESYNTHESIS}) = ccall((:ft_destroy_spinsphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHEREANALYSIS}) = ccall((:ft_destroy_spinsphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 
 struct AdjointFTPlan{T, S}
     parent::S
@@ -233,6 +276,9 @@ for (f, plan_f) in ((:fourier2sph, :plan_sph2fourier), (:fourier2sphv, :plan_sph
     end
 end
 
+plan_spinsph2fourier(x::AbstractArray{T}, y...; z...) where T = plan_spinsph2fourier(T, size(x, 1), y...; z...)
+spinsph2fourier(x::AbstractArray, y...; z...) = plan_spinsph2fourier(x, y...; z...)*x
+fourier2spinsph(x::AbstractArray, y...; z...) = plan_spinsph2fourier(x, y...; z...)\x
 
 function plan_leg2cheb(::Type{Float32}, n::Integer; normleg::Bool=false, normcheb::Bool=false)
     plan = ccall((:ft_plan_legendre_to_chebyshevf, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), normleg, normcheb, n)
@@ -427,6 +473,11 @@ function plan_tet2cheb(::Type{Float64}, n::Integer, α, β, γ, δ)
     return FTPlan{Float64, 3, TETRAHEDRON}(plan, n)
 end
 
+function plan_spinsph2fourier(::Type{Complex{Float64}}, n::Integer, s::Integer)
+    plan = ccall((:ft_plan_spinsph2fourier, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint), n, s)
+    return FTPlan{Complex{Float64}, 2, SPINSPHERE}(plan, n)
+end
+
 for (fJ, fC, fE, K) in ((:plan_sph_synthesis, :ft_plan_sph_synthesis, :ft_execute_sph_synthesis, SPHERESYNTHESIS),
                     (:plan_sph_analysis, :ft_plan_sph_analysis, :ft_execute_sph_analysis, SPHEREANALYSIS),
                     (:plan_sphv_synthesis, :ft_plan_sphv_synthesis, :ft_execute_sphv_synthesis, SPHEREVSYNTHESIS),
@@ -484,6 +535,35 @@ function lmul!(p::FTPlan{Float64, 3, TETRAHEDRONANALYSIS}, x::Array{Float64, 3})
     return x
 end
 
+plan_spinsph_synthesis(x::Matrix{T}, s::Integer) where T = plan_spinsph_synthesis(T, size(x, 1), size(x, 2), s)
+
+function plan_spinsph_synthesis(::Type{Complex{Float64}}, n::Integer, m::Integer, s::Integer)
+    plan = ccall((:ft_plan_spinsph_synthesis, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), n, m, s)
+    return FTPlan{Complex{Float64}, 2, SPINSPHERESYNTHESIS}(plan, n, m)
+end
+
+function lmul!(p::FTPlan{Complex{Float64}, 2, SPINSPHERESYNTHESIS}, x::Matrix{Complex{Float64}})
+    if p.n != size(x, 1) || p.m != size(x, 2)
+        throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
+    end
+    ccall((:ft_execute_spinsph_synthesis, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), p, x, size(x, 1), size(x, 2))
+    return x
+end
+
+plan_spinsph_analysis(x::Matrix{T}, s::Integer) where T = plan_spinsph_analysis(T, size(x, 1), size(x, 2), s)
+
+function plan_spinsph_analysis(::Type{Complex{Float64}}, n::Integer, m::Integer, s::Integer)
+    plan = ccall((:ft_plan_spinsph_analysis, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), n, m, s)
+    return FTPlan{Complex{Float64}, 2, SPINSPHEREANALYSIS}(plan, n, m)
+end
+
+function lmul!(p::FTPlan{Complex{Float64}, 2, SPINSPHEREANALYSIS}, x::Matrix{Complex{Float64}})
+    if p.n != size(x, 1) || p.m != size(x, 2)
+        throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
+    end
+    ccall((:ft_execute_spinsph_analysis, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), p, x, size(x, 1), size(x, 2))
+    return x
+end
 
 *(p::FTPlan{T}, x::Array{T}) where T = lmul!(p, deepcopy(x))
 *(p::AdjointFTPlan{T}, x::Array{T}) where T = lmul!(p, deepcopy(x))
@@ -634,6 +714,18 @@ function ldiv!(p::FTPlan{Float64, 3, TETRAHEDRON}, x::Array{Float64, 3})
     return x
 end
 
+function lmul!(p::FTPlan{Complex{Float64}, 2, SPINSPHERE}, x::Matrix{Complex{Float64}})
+    checksize(p, x)
+    ccall((:ft_execute_spinsph2fourier, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Complex{Float64}}, Cint, Cint), p, x, size(x, 1), size(x, 2))
+    return x
+end
+
+function ldiv!(p::FTPlan{Complex{Float64}, 2, SPINSPHERE}, x::Matrix{Complex{Float64}})
+    checksize(p, x)
+    ccall((:ft_execute_fourier2spinsph, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Complex{Float64}}, Cint, Cint), p, x, size(x, 1), size(x, 2))
+    return x
+end
+
 *(p::FTPlan{T}, x::Array{Complex{T}}) where T = lmul!(p, deepcopy(x))
 *(p::AdjointFTPlan{T}, x::Array{Complex{T}}) where T = lmul!(p, deepcopy(x))
 *(p::TransposeFTPlan{T}, x::Array{Complex{T}}) where T = lmul!(p, deepcopy(x))
diff --git a/src/specialfunctions.jl b/src/specialfunctions.jl
index ca74344b..6ac37b73 100644
--- a/src/specialfunctions.jl
+++ b/src/specialfunctions.jl
@@ -605,3 +605,50 @@ function tetones(::Type{T}, l::Int, m::Int, n::Int) where T
 end
 
 tetzeros(::Type{T}, l::Int, m::Int, n::Int) where T = zeros(T, l, m, n)
+
+function spinsphrand(::Type{T}, m::Int, n::Int, s::Int) where T
+    A = zeros(T, m, n)
+    as = abs(s)
+    for i = 1:m-as
+        A[i,1] = rand(T)
+    end
+    for j = 1:n÷2
+        for i = 1:m-max(j, as)
+            A[i,2j] = rand(T)
+            A[i,2j+1] = rand(T)
+        end
+    end
+    A
+end
+
+function spinsphrandn(::Type{T}, m::Int, n::Int, s::Int) where T
+    A = zeros(T, m, n)
+    as = abs(s)
+    for i = 1:m-as
+        A[i,1] = randn(T)
+    end
+    for j = 1:n÷2
+        for i = 1:m-max(j, as)
+            A[i,2j] = randn(T)
+            A[i,2j+1] = randn(T)
+        end
+    end
+    A
+end
+
+function spinsphones(::Type{T}, m::Int, n::Int, s::Int) where T
+    A = zeros(T, m, n)
+    as = abs(s)
+    for i = 1:m-as
+        A[i,1] = one(T)
+    end
+    for j = 1:n÷2
+        for i = 1:m-max(j, as)
+            A[i,2j] = one(T)
+            A[i,2j+1] = one(T)
+        end
+    end
+    A
+end
+
+spinsphzeros(::Type{T}, m::Int, n::Int) where T = zeros(T, m, n)
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 6492a402..e055ed0b 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -4,6 +4,25 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
 
 @testset "libfasttransforms" begin
     n = 64
+    for T in (Float32, Float64)
+        c = one(T) ./ (1:n)
+        x = collect(-1 .+ 2*(0:n-1)/T(n))
+        f = zero(x)
+        FastTransforms.horner!(c, x, f)
+        fd = T[sum(c[k]*x^(k-1) for k in 1:length(c)) for x in x]
+        @test f ≈ fd
+        FastTransforms.clenshaw!(c, x, f)
+        fd = T[sum(c[k]*cos((k-1)*acos(x)) for k in 1:length(c)) for x in x]
+        @test f ≈ fd
+        A = T[(2k+one(T))/(k+one(T)) for k in 0:length(c)-1]
+        B = T[zero(T) for k in 0:length(c)-1]
+        C = T[k/(k+one(T)) for k in 0:length(c)]
+        phi0 = ones(T, length(x))
+        c = cheb2leg(c)
+        FastTransforms.clenshaw!(c, A, B, C, x, phi0, f)
+        @test f ≈ fd
+    end
+
     α, β, γ, δ, λ, μ = 0.1, 0.2, 0.3, 0.4, 0.5, 0.6
     function test_1d_plans(p1, p2, x; skip::Bool=false)
         y = p1*x
@@ -120,7 +139,7 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
     ps = plan_tri_synthesis(A)
     pa = plan_tri_analysis(A)
     test_nd_plans(p, ps, pa, A)
-
+    #=
     A = tetones(Float64, n, n, n)
     p = plan_tet2cheb(A, α, β, γ, δ)
     ps = plan_tet_synthesis(A)
@@ -131,4 +150,10 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
     ps = plan_tet_synthesis(A)
     pa = plan_tet_analysis(A)
     test_nd_plans(p, ps, pa, A)
+    =#
+    A = spinsphones(Complex{Float64}, n, 2n-1, 2) + im*spinsphones(Complex{Float64}, n, 2n-1, 2)
+    p = plan_spinsph2fourier(A, 2)
+    ps = plan_spinsph_synthesis(A, 2)
+    pa = plan_spinsph_analysis(A, 2)
+    test_nd_plans(p, ps, pa, A)
 end

From 80aa1ab6b9f81899f6c41bd4840da0c4200e75f8 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Fri, 1 May 2020 00:16:01 -0500
Subject: [PATCH 014/222] improve travis CI

---
 .travis.yml | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 144b82cb..d7efc6f0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,8 +1,10 @@
 # Documentation: http://docs.travis-ci.com/user/languages/julia/
 language: julia
 os:
+  - freebsd
   - linux
   - osx
+  - windows
 julia:
   - 1.3
   - 1.4
@@ -12,6 +14,16 @@ matrix:
     - julia: nightly
 notifications:
   email: false
-after_success:
-  - julia -e 'using Pkg; cd(Pkg.dir("FastTransforms")); Pkg.add("Documenter"); include(joinpath("docs", "make.jl"))'
-  - julia -e 'using Pkg; cd(Pkg.dir("FastTransforms")); Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
+codecov: true
+jobs:
+  include:
+    - stage: "Documentation"
+      julia: 1.3
+      os: linux
+      script:
+        - julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
+        - julia --project=docs/ docs/make.jl
+      after_success: skip
+cache:
+  directories:
+    - $HOME/.julia/artifacts

From 5ae4f8421ff2274d70d31aa942af12a2c369b7cc Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Fri, 1 May 2020 00:23:52 -0500
Subject: [PATCH 015/222] matrix and jobs are aliases

---
 .travis.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index d7efc6f0..7f8bc56a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,13 +9,12 @@ julia:
   - 1.3
   - 1.4
   - nightly
-matrix:
-  allow_failures:
-    - julia: nightly
 notifications:
   email: false
 codecov: true
 jobs:
+  allow_failures:
+    - julia: nightly
   include:
     - stage: "Documentation"
       julia: 1.3

From 5e7eaece368422c8f60fb3f9f173bf37be7461a8 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Fri, 1 May 2020 00:35:16 -0500
Subject: [PATCH 016/222] codecov: true unrecognized

---
 .travis.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 7f8bc56a..68262465 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,7 +11,8 @@ julia:
   - nightly
 notifications:
   email: false
-codecov: true
+after_success:
+  - julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
 jobs:
   allow_failures:
     - julia: nightly

From 44074a98694d04e08ac8eafc15e0e291c8329a73 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Fri, 1 May 2020 00:56:45 -0500
Subject: [PATCH 017/222] docs need a project toml

---
 docs/Project.toml | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 docs/Project.toml

diff --git a/docs/Project.toml b/docs/Project.toml
new file mode 100644
index 00000000..f2a273e5
--- /dev/null
+++ b/docs/Project.toml
@@ -0,0 +1,5 @@
+[deps]
+Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+
+[compat]
+Documenter = "~0.24"

From fc78aad2b3018aa7dc916270e77db295672bb511 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Mon, 4 May 2020 22:01:47 -0500
Subject: [PATCH 018/222] remove tetrahedron bug, add AVX and FMA for
 spin-weighted spherical harmonics

fix build from source
---
 Project.toml                   | 6 +++---
 deps/build.jl                  | 3 ++-
 test/libfasttransformstests.jl | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/Project.toml b/Project.toml
index a01378a2..88e03e3f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.9.0"
+version = "0.9.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -18,11 +18,11 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "0.4, 0.5"
-BinaryProvider = "0.5.8"
+BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.3.0"
+FastTransforms_jll = "0.3.1"
 Reexport = "0.2"
 SpecialFunctions = "0.8, 0.9, 0.10"
 ToeplitzMatrices = "0.6"
diff --git a/deps/build.jl b/deps/build.jl
index 8300b133..700d6147 100644
--- a/deps/build.jl
+++ b/deps/build.jl
@@ -1,7 +1,7 @@
 using BinaryProvider
 import Libdl
 
-version = v"0.2.13"
+version = v"0.3.1"
 
 if arch(platform_key_abi()) != :x86_64
     @warn "FastTransforms has only been tested on x86_64 architectures."
@@ -37,6 +37,7 @@ if ft_build_from_source == "true"
             git clone -b v$version https://github.com/MikaelSlevinsky/FastTransforms.git FastTransforms
         fi
         cd FastTransforms
+        $make assembly $compiler
         $make lib $compiler $flags
         cd ..
         mv -f FastTransforms/libfasttransforms.$extension libfasttransforms.$extension
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index e055ed0b..a1dea531 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -139,7 +139,7 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
     ps = plan_tri_synthesis(A)
     pa = plan_tri_analysis(A)
     test_nd_plans(p, ps, pa, A)
-    #=
+
     A = tetones(Float64, n, n, n)
     p = plan_tet2cheb(A, α, β, γ, δ)
     ps = plan_tet_synthesis(A)
@@ -150,7 +150,7 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
     ps = plan_tet_synthesis(A)
     pa = plan_tet_analysis(A)
     test_nd_plans(p, ps, pa, A)
-    =#
+
     A = spinsphones(Complex{Float64}, n, 2n-1, 2) + im*spinsphones(Complex{Float64}, n, 2n-1, 2)
     p = plan_spinsph2fourier(A, 2)
     ps = plan_spinsph_synthesis(A, 2)

From 3b70c23217c9dcd7e6e433017ddc3fb5b763e4f9 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Fri, 15 May 2020 11:29:26 -0500
Subject: [PATCH 019/222] cleanup C - Julia BigFloat interop (#104)

* cleanup C - Julia BigFloat interop

* rework help text
---
 Project.toml                   |  4 +--
 deps/build.jl                  |  2 +-
 src/FastTransforms.jl          |  1 -
 src/libfasttransforms.jl       | 58 ++++++++++++++--------------------
 test/libfasttransformstests.jl | 36 +++++++--------------
 5 files changed, 38 insertions(+), 63 deletions(-)

diff --git a/Project.toml b/Project.toml
index 88e03e3f..ebe98dd2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.9.1"
+version = "0.9.2"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -22,7 +22,7 @@ BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.3.1"
+FastTransforms_jll = "0.3.2"
 Reexport = "0.2"
 SpecialFunctions = "0.8, 0.9, 0.10"
 ToeplitzMatrices = "0.6"
diff --git a/deps/build.jl b/deps/build.jl
index 700d6147..5c6062be 100644
--- a/deps/build.jl
+++ b/deps/build.jl
@@ -1,7 +1,7 @@
 using BinaryProvider
 import Libdl
 
-version = v"0.3.1"
+version = v"0.3.2"
 
 if arch(platform_key_abi()) != :x86_64
     @warn "FastTransforms has only been tested on x86_64 architectures."
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index f3ea069f..89ca76f4 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -12,7 +12,6 @@ import Base: unsafe_convert, eltype, ndims, adjoint, transpose, show, *, \,
              inv, size, view
 
 import Base.GMP: Limb
-import Base.MPFR: BigFloat, _BigFloat
 
 import AbstractFFTs: Plan, ScaledPlan,
                      fft, ifft, bfft, fft!, ifft!, bfft!,
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 4cbff2ac..d3ff885a 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -32,13 +32,21 @@ struct mpfr_t <: AbstractFloat
     d::Ptr{Limb}
 end
 
-mpfr_t(x::BigFloat) = mpfr_t(x.prec, x.sign, x.exp, x.d)
+"""
+`BigFloat` is a mutable struct and there is no guarantee that each entry in
+an `Array{BigFloat}` has unique pointers. For example, looking at the `Limb`s,
+
+    Id = Matrix{BigFloat}(I, 3, 3)
+    map(x->x.d, Id)
 
-function BigFloat(x::mpfr_t)
-    nb = ccall((:mpfr_custom_get_size,:libmpfr), Csize_t, (Clong,), precision(BigFloat))
-    nb = (nb + Core.sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this
-    str = unsafe_string(Ptr{UInt8}(x.d), nb * Core.sizeof(Limb))
-    _BigFloat(x.prec, x.sign, x.exp, str)
+shows that the ones and the zeros all share the same pointers. If a C function
+assumes unicity of each datum, then the array must be renewed with a `deepcopy`.
+"""
+function renew!(x::Array{BigFloat})
+    for i in eachindex(x)
+        @inbounds x[i] = deepcopy(x[i])
+    end
+    return x
 end
 
 set_num_threads(n::Integer) = ccall((:ft_set_num_threads, libfasttransforms), Cvoid, (Cint, ), n)
@@ -602,31 +610,22 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bfmvf, :Float32),
     end
 end
 
-for (fJ, fC) in ((:lmul!, :ft_mpfr_trmv),
-                 (:ldiv!, :ft_mpfr_trsv))
+for (fJ, fC) in ((:lmul!, :ft_mpfr_trmv_ptr),
+                 (:ldiv!, :ft_mpfr_trsv_ptr))
     @eval begin
         function $fJ(p::FTPlan{BigFloat, 1}, x::Vector{BigFloat})
             checksize(p, x)
-            xt = deepcopy.(x)
-            xc = mpfr_t.(xt)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{mpfr_t}, Int32), 'N', p.n, p, p.n, xc, Base.MPFR.ROUNDING_MODE[])
-            x .= BigFloat.(xc)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Int32), 'N', p.n, p, p.n, renew!(x), Base.MPFR.ROUNDING_MODE[])
             return x
         end
         function $fJ(p::AdjointFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::Vector{BigFloat}) where K
             checksize(p, x)
-            xt = deepcopy.(x)
-            xc = mpfr_t.(xt)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{mpfr_t}, Int32), 'T', p.parent.n, p, p.parent.n, xc, Base.MPFR.ROUNDING_MODE[])
-            x .= BigFloat.(xc)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), Base.MPFR.ROUNDING_MODE[])
             return x
         end
         function $fJ(p::TransposeFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::Vector{BigFloat}) where K
             checksize(p, x)
-            xt = deepcopy.(x)
-            xc = mpfr_t.(xt)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{mpfr_t}, Int32), 'T', p.parent.n, p, p.parent.n, xc, Base.MPFR.ROUNDING_MODE[])
-            x .= BigFloat.(xc)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), Base.MPFR.ROUNDING_MODE[])
             return x
         end
     end
@@ -655,31 +654,22 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bfmmf, :Float32),
     end
 end
 
-for (fJ, fC) in ((:lmul!, :ft_mpfr_trmm),
-                 (:ldiv!, :ft_mpfr_trsm))
+for (fJ, fC) in ((:lmul!, :ft_mpfr_trmm_ptr),
+                 (:ldiv!, :ft_mpfr_trsm_ptr))
     @eval begin
         function $fJ(p::FTPlan{BigFloat, 1}, x::Matrix{BigFloat})
             checksize(p, x)
-            xt = deepcopy.(x)
-            xc = mpfr_t.(xt)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{mpfr_t}, Cint, Cint, Int32), 'N', p.n, p, p.n, xc, size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
-            x .= BigFloat.(xc)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'N', p.n, p, p.n, renew!(x), size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
             return x
         end
         function $fJ(p::AdjointFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::Matrix{BigFloat}) where K
             checksize(p, x)
-            xt = deepcopy.(x)
-            xc = mpfr_t.(xt)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{mpfr_t}, Cint, Cint, Int32), 'T', p.parent.n, p, p.parent.n, xc, size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
-            x .= BigFloat.(xc)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
             return x
         end
         function $fJ(p::TransposeFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::Matrix{BigFloat}) where K
             checksize(p, x)
-            xt = deepcopy.(x)
-            xc = mpfr_t.(xt)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{mpfr_t}, Cint, Cint, Int32), 'T', p.parent.n, p, p.parent.n, xc, size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
-            x .= BigFloat.(xc)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
             return x
         end
     end
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index a1dea531..9d04c704 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -24,7 +24,7 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
     end
 
     α, β, γ, δ, λ, μ = 0.1, 0.2, 0.3, 0.4, 0.5, 0.6
-    function test_1d_plans(p1, p2, x; skip::Bool=false)
+    function test_1d_plans(p1, p2, x)
         y = p1*x
         z = p2*y
         @test z ≈ x
@@ -44,51 +44,37 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         @test z ≈ x
         P = p1*I
         Q = p2*P
-        skip ? (@test_skip Q ≈ I) : (@test Q ≈ I)
+        @test Q ≈ I
         P = p1*I
         Q = p1'P
         P = transpose(p1)*Q
         Q = transpose(p1)\P
         P = p1'\Q
         Q = p1\P
-        skip ? (@test_skip Q ≈ I) : (@test Q ≈ I)
+        @test Q ≈ I
         P = p2*I
         Q = p2'P
         P = transpose(p2)*Q
         Q = transpose(p2)\P
         P = p2'\Q
         Q = p2\P
-        skip ? (@test_skip Q ≈ I) : (@test Q ≈ I)
+        @test Q ≈ I
     end
 
-    for T in (Float32, Float64, Complex{Float32}, Complex{Float64})
+    for T in (Float32, Float64, Complex{Float32}, Complex{Float64}, BigFloat, Complex{BigFloat})
         x = T(1)./(1:n)
         Id = Matrix{T}(I, n, n)
         for (p1, p2) in ((plan_leg2cheb(Id), plan_cheb2leg(Id)),
-                          (plan_ultra2ultra(Id, λ, μ), plan_ultra2ultra(Id, μ, λ)),
-                          (plan_jac2jac(Id, α, β, γ, δ), plan_jac2jac(Id, γ, δ, α, β)),
-                          (plan_lag2lag(Id, α, β), plan_lag2lag(Id, β, α)),
-                          (plan_jac2ultra(Id, α, β, λ), plan_ultra2jac(Id, λ, α, β)),
-                          (plan_jac2cheb(Id, α, β), plan_cheb2jac(Id, α, β)),
-                          (plan_ultra2cheb(Id, λ), plan_cheb2ultra(Id, λ)))
+                         (plan_ultra2ultra(Id, λ, μ), plan_ultra2ultra(Id, μ, λ)),
+                         (plan_jac2jac(Id, α, β, γ, δ), plan_jac2jac(Id, γ, δ, α, β)),
+                         (plan_lag2lag(Id, α, β), plan_lag2lag(Id, β, α)),
+                         (plan_jac2ultra(Id, α, β, λ), plan_ultra2jac(Id, λ, α, β)),
+                         (plan_jac2cheb(Id, α, β), plan_cheb2jac(Id, α, β)),
+                         (plan_ultra2cheb(Id, λ), plan_cheb2ultra(Id, λ)))
             test_1d_plans(p1, p2, x)
         end
     end
 
-    for T in (BigFloat, Complex{BigFloat})
-        x = T(1)./(1:n)
-        Id = Matrix{T}(I, n, n)
-        for (p1, p2) in ((plan_leg2cheb(Id), plan_cheb2leg(Id)),
-                          (plan_ultra2ultra(Id, λ, μ), plan_ultra2ultra(Id, μ, λ)),
-                          (plan_jac2jac(Id, α, β, γ, δ), plan_jac2jac(Id, γ, δ, α, β)),
-                          (plan_lag2lag(Id, α, β), plan_lag2lag(Id, β, α)),
-                          (plan_jac2ultra(Id, α, β, λ), plan_ultra2jac(Id, λ, α, β)),
-                          (plan_jac2cheb(Id, α, β), plan_cheb2jac(Id, α, β)),
-                          (plan_ultra2cheb(Id, λ), plan_cheb2ultra(Id, λ)))
-            test_1d_plans(p1, p2, x; skip=true)
-        end
-    end
-
     function test_nd_plans(p, ps, pa, A)
         B = copy(A)
         C = ps*(p*A)

From 8d6e163271b4d8db0dcb57116a54587f9e30c36e Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Sat, 30 May 2020 19:55:29 -0500
Subject: [PATCH 020/222] Migrate to GitHub actions (#105)

* trial ci.yml

* add CI badge

* try x86

* separate macos

* fix macos

* remove macos from matrix

* expand macos versions

* switch order

* serialize

* use exclude instead

* remove appveyor and cirrus, pare down travis

remove badges from readme

* show version info
---
 .appveyor.yml            | 43 ---------------------------
 .cirrus.yml              | 18 ------------
 .github/workflows/ci.yml | 63 ++++++++++++++++++++++++++++++++++++++++
 .travis.yml              | 13 ---------
 README.md                |  2 +-
 5 files changed, 64 insertions(+), 75 deletions(-)
 delete mode 100644 .appveyor.yml
 delete mode 100644 .cirrus.yml
 create mode 100644 .github/workflows/ci.yml

diff --git a/.appveyor.yml b/.appveyor.yml
deleted file mode 100644
index 06bb417a..00000000
--- a/.appveyor.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-environment:
-  matrix:
-  - julia_version: 1.3
-  - julia_version: 1.4
-  - julia_version: nightly
-
-platform:
-  - x86 # 32-bit
-  - x64 # 64-bit
-
-# # Uncomment the following lines to allow failures on nightly julia
-# # (tests will run but not make your overall status red)
-matrix:
-  allow_failures:
-    - julia_version: nightly
-
-branches:
-  only:
-    - master
-    - /release-.*/
-
-notifications:
-  - provider: Email
-    on_build_success: false
-    on_build_failure: false
-    on_build_status_changed: false
-
-install:
-  - ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1"))
-
-build_script:
-  - echo "%JL_BUILD_SCRIPT%"
-  - C:\julia\bin\julia -e "%JL_BUILD_SCRIPT%"
-
-test_script:
-  - echo "%JL_TEST_SCRIPT%"
-  - C:\julia\bin\julia -e "%JL_TEST_SCRIPT%"
-
-# # Uncomment to support code coverage upload. Should only be enabled for packages
-# # which would have coverage gaps without running on Windows
-# on_success:
-#   - echo "%JL_CODECOV_SCRIPT%"
-#   - C:\julia\bin\julia -e "%JL_CODECOV_SCRIPT%"
diff --git a/.cirrus.yml b/.cirrus.yml
deleted file mode 100644
index d7653148..00000000
--- a/.cirrus.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-freebsd_instance:
-  image: freebsd-12-1-release-amd64
-task:
-  name: FreeBSD
-  env:
-    matrix:
-      - JULIA_VERSION: 1.3
-      - JULIA_VERSION: 1.4
-      - JULIA_VERSION: nightly
-  allow_failures: $JULIA_VERSION == 'nightly'
-  install_script:
-    - sh -c "$(fetch https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh -o -)"
-  build_script:
-    - cirrusjl build
-  test_script:
-    - cirrusjl test
-  coverage_script:
-    - cirrusjl coverage codecov coveralls
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 00000000..fa4a1153
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,63 @@
+name: CI
+on:
+  - push
+  - pull_request
+jobs:
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        version:
+          - '1.3'
+          - '1.4'
+          - 'nightly'
+        os:
+          - ubuntu-latest
+          - macOS-latest
+          - windows-latest
+        arch:
+          - x86
+          - x64
+        exclude:
+          - os: macOS-latest
+            arch: x86
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+          show-versioninfo: true
+      - uses: actions/cache@v1
+        env:
+          cache-name: cache-artifacts
+        with:
+          path: ~/.julia/artifacts
+          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
+          restore-keys: |
+            ${{ runner.os }}-test-${{ env.cache-name }}-
+            ${{ runner.os }}-test-
+            ${{ runner.os }}-
+      - uses: julia-actions/julia-buildpkg@latest
+      - uses: julia-actions/julia-runtest@latest
+      - uses: julia-actions/julia-uploadcodecov@latest
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+  docs:
+    name: Documentation
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: '1.3'
+      - run: |
+          julia --project=docs -e '
+            using Pkg
+            Pkg.develop(PackageSpec(path=pwd()))
+            Pkg.instantiate()'
+      - run: julia --project=docs docs/make.jl
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.travis.yml b/.travis.yml
index 68262465..f60485ef 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,28 +2,15 @@
 language: julia
 os:
   - freebsd
-  - linux
-  - osx
-  - windows
 julia:
   - 1.3
   - 1.4
   - nightly
 notifications:
   email: false
-after_success:
-  - julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(Codecov.process_folder())'
 jobs:
   allow_failures:
     - julia: nightly
-  include:
-    - stage: "Documentation"
-      julia: 1.3
-      os: linux
-      script:
-        - julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()'
-        - julia --project=docs/ docs/make.jl
-      after_success: skip
 cache:
   directories:
     - $HOME/.julia/artifacts
diff --git a/README.md b/README.md
index e746e1b0..9540c6a9 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # FastTransforms.jl
 
-[![Travis](https://travis-ci.org/JuliaApproximation/FastTransforms.jl.svg?branch=master)](https://travis-ci.org/JuliaApproximation/FastTransforms.jl) [![AppVeyor](https://ci.appveyor.com/api/projects/status/oba9qush15q3x8pb/branch/master?svg=true)](https://ci.appveyor.com/project/MikaelSlevinsky/fasttransforms-jl/branch/master) [![Cirrus](https://api.cirrus-ci.com/github/JuliaApproximation/FastTransforms.jl.svg)](https://cirrus-ci.com/github/JuliaApproximation/FastTransforms.jl) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-latest-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/latest)
+[![Build Status](https://github.com/JuliaApproximation/FastTransforms.jl/workflows/CI/badge.svg)](https://github.com/JuliaApproximation/FastTransforms.jl/actions?query=workflow%3ACI) [![Travis](https://travis-ci.org/JuliaApproximation/FastTransforms.jl.svg?branch=master)](https://travis-ci.org/JuliaApproximation/FastTransforms.jl) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-latest-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/latest)
 
 `FastTransforms.jl` allows the user to conveniently work with orthogonal polynomials with degrees well into the millions.
 

From f89dbb39985c0ec86efee7c5323e863f3f02ed95 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Thu, 16 Jul 2020 15:31:13 +0100
Subject: [PATCH 021/222] Generalise Clenshaw (#112)

* Generalise clenshaw for other array types

* Support general Clenshaw

* Add forwardrecurrence!

* Turn on codecov

* fix tests

* Match libfasttransforms in Clenshaw

* Add ChebyshevU special case

* use propogate_inbounds
---
 .gitignore                     |   1 +
 Project.toml                   |   2 +
 src/FastTransforms.jl          |   6 +-
 src/clenshaw.jl                | 152 +++++++++++++++++++++++++++++++++
 src/libfasttransforms.jl       |  34 ++++++--
 test/clenshawtests.jl          | 106 +++++++++++++++++++++++
 test/libfasttransformstests.jl |   8 +-
 test/runtests.jl               |  10 +--
 8 files changed, 297 insertions(+), 22 deletions(-)
 create mode 100644 src/clenshaw.jl
 create mode 100644 test/clenshawtests.jl

diff --git a/.gitignore b/.gitignore
index 2a0e93eb..77b0d9d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ deps/build.log
 deps/libfasttransforms.*
 .DS_Store
 deps/FastTransforms/
+Manifest.toml
diff --git a/Project.toml b/Project.toml
index ebe98dd2..932b1e66 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,6 +9,7 @@ DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
 FastTransforms_jll = "34b6f7d7-08f9-5794-9e10-3819e4c7e49a"
+FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
@@ -23,6 +24,7 @@ DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.3.2"
+FillArrays = "0.8"
 Reexport = "0.2"
 SpecialFunctions = "0.8, 0.9, 0.10"
 ToeplitzMatrices = "0.6"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 89ca76f4..bf4bde4d 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,7 +1,7 @@
 module FastTransforms
 
 using FastGaussQuadrature, LinearAlgebra
-using Reexport, SpecialFunctions, ToeplitzMatrices
+using Reexport, SpecialFunctions, ToeplitzMatrices, FillArrays
 
 import DSP
 
@@ -28,6 +28,8 @@ import FFTW: dct, dct!, idct, idct!, plan_dct!, plan_idct!,
 
 import FastGaussQuadrature: unweightedgausshermite
 
+import FillArrays: AbstractFill, getindex_value
+
 import LinearAlgebra: mul!, lmul!, ldiv!
 
 export leg2cheb, cheb2leg, ultra2ultra, jac2jac,
@@ -94,4 +96,6 @@ lgamma(x) = logabsgamma(x)[1]
 
 include("specialfunctions.jl")
 
+include("clenshaw.jl")
+
 end # module
diff --git a/src/clenshaw.jl b/src/clenshaw.jl
new file mode 100644
index 00000000..619c71ae
--- /dev/null
+++ b/src/clenshaw.jl
@@ -0,0 +1,152 @@
+"""
+   forwardrecurrence!(v, A, B, C, x)
+
+evaluates the orthogonal polynomials at points `x`,
+where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
+as defined in DLMF,
+overwriting `v` with the results.   
+"""
+function forwardrecurrence!(v::AbstractVector{T}, A::AbstractVector, B::AbstractVector, C::AbstractVector, x) where T
+    N = length(v)
+    N == 0 && return v
+    length(A)+1 ≥ N && length(B)+1 ≥ N && length(C)+1 ≥ N || throw(ArgumentError("A, B, C must contain at least $(N-1) entries"))
+    p0 = one(T) # assume OPs are normalized to one for no
+    p1 = convert(T, N == 1 ? p0 : A[1]x + B[1]) # avoid accessing A[1]/B[1] if empty
+    _forwardrecurrence!(v, A, B, C, x, p0, p1)
+end
+
+
+Base.@propagate_inbounds _forwardrecurrence_next(n, A, B, C, x, p0, p1) = muladd(muladd(A[n],x,B[n]), p1, -C[n]*p0)
+# special case for B[n] == 0
+Base.@propagate_inbounds _forwardrecurrence_next(n, A, ::Zeros, C, x, p0, p1) = muladd(A[n]*x, p1, -C[n]*p0)
+# special case for Chebyshev U
+Base.@propagate_inbounds _forwardrecurrence_next(n, A::AbstractFill, ::Zeros, C::Ones, x, p0, p1) = muladd(getindex_value(A)*x, p1, -p0)
+
+
+# this supports adaptivity: we can populate `v` for large `n`
+function _forwardrecurrence!(v::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x, p0, p1)
+    N = length(v)
+    N == 0 && return v
+    v[1] = p0
+    N == 1 && return v
+    v[2] = p1
+    @inbounds for n = 2:N-1
+        p1,p0 = _forwardrecurrence_next(n, A, B, C, x, p0, p1),p1
+        v[n+1] = p1
+    end
+    v
+end
+
+
+
+forwardrecurrence(N::Integer, A::AbstractVector, B::AbstractVector, C::AbstractVector, x) =
+    forwardrecurrence!(Vector{promote_type(eltype(A),eltype(B),eltype(C),typeof(x))}(undef, N), A, B, C, x)
+
+
+"""
+clenshaw!(c, A, B, C, x)
+
+evaluates the orthogonal polynomial expansion with coefficients `c` at points `x`,
+where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
+as defined in DLMF,
+overwriting `x` with the results.
+"""
+clenshaw!(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector) = 
+    clenshaw!(c, A, B, C, x, Ones{eltype(x)}(length(x)), x)
+
+
+"""
+clenshaw!(c, A, B, C, x, ϕ₀, f)
+
+evaluates the orthogonal polynomial expansion with coefficients `c` at points `x`,
+where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
+as defined in DLMF and ϕ₀ is the zeroth coefficient,
+overwriting `f` with the results.
+"""
+function clenshaw!(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector, ϕ₀::AbstractVector, f::AbstractVector)
+    f .= ϕ₀ .* clenshaw.(Ref(c), Ref(A), Ref(B), Ref(C), x)
+end
+
+
+@inline _clenshaw_next(n, A, B, C, x, c, bn1, bn2) = muladd(muladd(A[n],x,B[n]), bn1, muladd(-C[n+1],bn2,c[n]))
+@inline _clenshaw_next(n, A, ::Zeros, C, x, c, bn1, bn2) = muladd(A[n]*x, bn1, muladd(-C[n+1],bn2,c[n]))
+# Chebyshev U
+@inline _clenshaw_next(n, A::AbstractFill, ::Zeros, C::Ones, x, c, bn1, bn2) = muladd(getindex_value(A)*x, bn1, -bn2+c[n])
+
+"""
+    clenshaw(c, A, B, C, x)
+
+evaluates the orthogonal polynomial expansion with coefficients `c` at points `x`,
+where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
+as defined in DLMF.
+`x` may also be a single `Number`.
+"""
+     
+function clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number)
+    N = length(c)
+    T = promote_type(eltype(c),eltype(A),eltype(B),eltype(C),typeof(x))
+    @boundscheck check_clenshaw_recurrences(N, A, B, C)
+    N == 0 && return zero(T)
+    @inbounds begin
+        bn2 = zero(T)
+        bn1 = convert(T,c[N])
+        for n = N-1:-1:1
+            bn1,bn2 = _clenshaw_next(n, A, B, C, x, c, bn1, bn2),bn1
+        end
+    end
+    bn1
+end
+
+
+clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector) = 
+    clenshaw!(c, A, B, C, copy(x))
+
+###
+# Chebyshev T special cases
+###
+
+"""
+   clenshaw!(c, x)
+
+evaluates the first-kind Chebyshev (T) expansion with coefficients `c` at points `x`,
+overwriting `x` with the results.
+"""
+clenshaw!(c::AbstractVector, x::AbstractVector) = clenshaw!(c, x, x)
+
+
+"""
+   clenshaw!(c, x, f)
+
+evaluates the first-kind Chebyshev (T) expansion with coefficients `c` at points `x`,
+overwriting `f` with the results.
+"""
+function clenshaw!(c::AbstractVector, x::AbstractVector, f::AbstractVector)
+    f .= clenshaw.(Ref(c), x)
+end
+
+"""
+    clenshaw(c, x)
+
+evaluates the first-kind Chebyshev (T) expansion with coefficients `c` at  the points `x`.
+`x` may also be a single `Number`.
+"""
+function clenshaw(c::AbstractVector, x::Number)
+    N,T = length(c),promote_type(eltype(c),typeof(x))
+    if N == 0
+        return zero(T)
+    elseif N == 1 # avoid issues with NaN x
+        return first(c)*one(x)
+    end
+
+    y = 2x
+    bk1,bk2 = zero(T),zero(T)
+    @inbounds begin
+        for k = N:-1:2
+            bk1,bk2 = muladd(y,bk1,c[k]-bk2),bk1
+        end
+        muladd(x,bk1,c[1]-bk2)
+    end
+end
+
+clenshaw(c::AbstractVector, x::AbstractVector) = clenshaw!(c, copy(x))
+
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index d3ff885a..a1efba39 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -54,33 +54,51 @@ set_num_threads(n::Integer) = ccall((:ft_set_num_threads, libfasttransforms), Cv
 function horner!(c::Vector{Float64}, x::Vector{Float64}, f::Vector{Float64})
     @assert length(x) == length(f)
     ccall((:ft_horner, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, 1, length(x), x, f)
+    f
 end
 
 function horner!(c::Vector{Float32}, x::Vector{Float32}, f::Vector{Float32})
     @assert length(x) == length(f)
     ccall((:ft_hornerf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, 1, length(x), x, f)
+    f
+end
+
+function check_clenshaw_recurrences(N, A, B, C)
+    if length(A) < N || length(B) < N || length(C) < N+1
+        throw(ArgumentError("A, B must contain at least $N entries and C must contain at least $(N+1) entrie"))
+    end
+end
+
+function check_clenshaw_points(x, ϕ₀, f)
+    length(x) == length(ϕ₀) == length(f) || throw(ArgumentError("Dimensions must match"))
 end
 
 function clenshaw!(c::Vector{Float64}, x::Vector{Float64}, f::Vector{Float64})
     @assert length(x) == length(f)
     ccall((:ft_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, 1, length(x), x, f)
+    f
 end
 
 function clenshaw!(c::Vector{Float32}, x::Vector{Float32}, f::Vector{Float32})
     @assert length(x) == length(f)
     ccall((:ft_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, 1, length(x), x, f)
+    f
 end
 
-function clenshaw!(c::Vector{Float64}, A::Vector{Float64}, B::Vector{Float64}, C::Vector{Float64}, x::Vector{Float64}, phi0::Vector{Float64}, f::Vector{Float64})
-    @assert length(c) == length(A) == length(B) == length(C)-1
-    @assert length(x) == length(phi0) == length(f)
-    ccall((:ft_orthogonal_polynomial_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Cint, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}), length(c), c, 1, A, B, C, length(x), x, phi0, f)
+function clenshaw!(c::Vector{Float64}, A::Vector{Float64}, B::Vector{Float64}, C::Vector{Float64}, x::Vector{Float64}, ϕ₀::Vector{Float64}, f::Vector{Float64})
+    N = length(c)
+    @boundscheck check_clenshaw_recurrences(N, A, B, C)
+    @boundscheck check_clenshaw_points(x, ϕ₀, f)
+    ccall((:ft_orthogonal_polynomial_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Cint, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}), N, c, 1, A, B, C, length(x), x, ϕ₀, f)
+    f
 end
 
-function clenshaw!(c::Vector{Float32}, A::Vector{Float32}, B::Vector{Float32}, C::Vector{Float32}, x::Vector{Float32}, phi0::Vector{Float32}, f::Vector{Float32})
-    @assert length(c) == length(A) == length(B) == length(C)-1
-    @assert length(x) == length(phi0) == length(f)
-    ccall((:ft_orthogonal_polynomial_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cint, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}), length(c), c, 1, A, B, C, length(x), x, phi0, f)
+function clenshaw!(c::Vector{Float32}, A::Vector{Float32}, B::Vector{Float32}, C::Vector{Float32}, x::Vector{Float32}, ϕ₀::Vector{Float32}, f::Vector{Float32})
+    N = length(c)
+    @boundscheck check_clenshaw_recurrences(N, A, B, C)
+    @boundscheck check_clenshaw_points(x, ϕ₀, f)
+    ccall((:ft_orthogonal_polynomial_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cint, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}), N, c, 1, A, B, C, length(x), x, ϕ₀, f)
+    f
 end
 
 const LEG2CHEB              = 0
diff --git a/test/clenshawtests.jl b/test/clenshawtests.jl
new file mode 100644
index 00000000..8952ad39
--- /dev/null
+++ b/test/clenshawtests.jl
@@ -0,0 +1,106 @@
+using FastTransforms, FillArrays, Test
+import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrence
+
+@testset "clenshaw" begin
+    @testset "Chebyshev T" begin
+        c = [1,2,3]
+        cf = float(c)
+        @test @inferred(clenshaw(c,1)) ≡ 1 + 2 + 3
+        @test @inferred(clenshaw(c,0)) ≡ 1 + 0 - 3
+        @test @inferred(clenshaw(c,0.1)) == 1 + 2*0.1 + 3*cos(2acos(0.1))
+        @test @inferred(clenshaw(c,[-1,0,1])) == clenshaw!(c,[-1,0,1]) == [2,-2,6]
+        @test clenshaw(c,[-1,0,1]) isa Vector{Int}
+        @test @inferred(clenshaw(Float64[],1)) ≡ 0.0
+
+        x = [1,0,0.1]
+        @test @inferred(clenshaw(c,x)) ≈ @inferred(clenshaw!(c,copy(x))) ≈ 
+            @inferred(clenshaw!(c,x,similar(x))) ≈
+            @inferred(clenshaw(cf,x)) ≈ @inferred(clenshaw!(cf,copy(x))) ≈ 
+            @inferred(clenshaw!(cf,x,similar(x))) ≈ [6,-2,-1.74]
+    end
+
+    @testset "Chebyshev U" begin
+        N = 5
+        A, B, C = Fill(2,N-1), Zeros{Int}(N-1), Ones{Int}(N)
+        @testset "forwardrecurrence!" begin
+            @test @inferred(forwardrecurrence(N, A, B, C, 1)) == @inferred(forwardrecurrence!(Vector{Int}(undef,N), A, B, C, 1)) == 1:N
+            @test forwardrecurrence!(Vector{Int}(undef,N), A, B, C, -1) == (-1) .^ (0:N-1) .* (1:N)
+            @test forwardrecurrence(N, A, B, C, 0.1) ≈ forwardrecurrence!(Vector{Float64}(undef,N), A, B, C, 0.1) ≈ 
+                    sin.((1:N) .* acos(0.1)) ./ sqrt(1-0.1^2)
+        end
+
+        c = [1,2,3]
+        @test c'forwardrecurrence(3, A, B, C, 0.1) ≈ clenshaw([1,2,3], A, B, C, 0.1) ≈ 
+            1 + (2sin(2acos(0.1)) + 3sin(3acos(0.1)))/sqrt(1-0.1^2)
+    end
+
+    @testset "Chebyshev-as-general" begin
+        @testset "forwardrecurrence!" begin
+            N = 5
+            A, B, C = [1; fill(2,N-2)], fill(0,N-1), fill(1,N)
+            Af, Bf, Cf = float(A), float(B), float(C)
+            @test forwardrecurrence(N, A, B, C, 1) == forwardrecurrence!(Vector{Int}(undef,N), A, B, C, 1) == ones(Int,N)
+            @test forwardrecurrence!(Vector{Int}(undef,N), A, B, C, -1) == (-1) .^ (0:N-1)
+            @test forwardrecurrence(N, A, B, C, 0.1) ≈ forwardrecurrence!(Vector{Float64}(undef,N), A, B, C, 0.1) ≈ cos.((0:N-1) .* acos(0.1))
+        end
+
+        c, A, B, C = [1,2,3], [1,2,2], fill(0,3), fill(1,4)
+        cf, Af, Bf, Cf = float(c), float(A), float(B), float(C)
+        @test @inferred(clenshaw(c, A, B, C, 1)) ≡ 6
+        @test @inferred(clenshaw(c, A, B, C, 0.1)) ≡ -1.74
+        @test @inferred(clenshaw([1,2,3], A, B, C, [-1,0,1])) == clenshaw!([1,2,3],A, B, C, [-1,0,1]) == [2,-2,6]
+        @test clenshaw(c, A, B, C, [-1,0,1]) isa Vector{Int}
+        @test @inferred(clenshaw(Float64[], A, B, C, 1)) ≡ 0.0
+
+        x = [1,0,0.1]
+        @test @inferred(clenshaw(c, A, B, C, x)) ≈ @inferred(clenshaw!(c, A, B, C, copy(x))) ≈ 
+            @inferred(clenshaw!(c, A, B, C, x, one.(x), similar(x))) ≈
+            @inferred(clenshaw!(cf, Af, Bf, Cf, x, one.(x),similar(x))) ≈
+            @inferred(clenshaw([1.,2,3], A, B, C, x)) ≈ 
+            @inferred(clenshaw!([1.,2,3], A, B, C, copy(x))) ≈ [6,-2,-1.74]
+    end
+
+    @testset "Legendre" begin
+        @testset "Float64" begin
+            N = 5
+            n = 0:N-1
+            A = (2n .+ 1) ./ (n .+ 1)
+            B = zeros(N)
+            C = n ./ (n .+ 1)
+            v_1 = forwardrecurrence(N, A, B, C, 1)
+            v_f = forwardrecurrence(N, A, B, C, 0.1)
+            @test v_1 ≈ ones(N)
+            @test forwardrecurrence(N, A, B, C, -1) ≈ (-1) .^ (0:N-1)
+            @test v_f ≈ [1,0.1,-0.485,-0.1475,0.3379375]
+
+            n = 0:N # need extra entry for C in Clenshaw
+            C = n ./ (n .+ 1)
+            for j = 1:N
+                c = [zeros(j-1); 1]
+                @test clenshaw(c, A, B, C, 1) ≈ v_1[j] # Julia code
+                @test clenshaw(c, A, B, C, 0.1) ≈  v_f[j] # Julia code
+                @test clenshaw!(c, A, B, C, [1.0,0.1], [1.0,1.0], [0.0,0.0])  ≈ [v_1[j],v_f[j]] # libfasttransforms
+            end
+        end
+
+        @testset "BigFloat" begin
+            N = 5
+            n = BigFloat(0):N-1
+            A = (2n .+ 1) ./ (n .+ 1)
+            B = zeros(N)
+            C = n ./ (n .+ 1)
+            @test forwardrecurrence(N, A, B, C, parse(BigFloat,"0.1")) ≈ [1,big"0.1",big"-0.485",big"-0.1475",big"0.3379375"]
+        end
+    end
+
+    @testset "Int" begin
+        N = 10; A = 1:10; B = 2:11; C = range(3; step=2, length=N+1)
+        v_i = forwardrecurrence(N, A, B, C, 1)
+        v_f = forwardrecurrence(N, A, B, C, 0.1)
+        @test v_i isa Vector{Int}
+        @test v_f isa Vector{Float64}
+
+        j = 3
+        clenshaw([zeros(Int,j-1); 1; zeros(Int,N-j)], A, B, C, 1) == v_i[j]
+    end
+end
\ No newline at end of file
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 9d04c704..0affa021 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -7,11 +7,11 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
     for T in (Float32, Float64)
         c = one(T) ./ (1:n)
         x = collect(-1 .+ 2*(0:n-1)/T(n))
-        f = zero(x)
-        FastTransforms.horner!(c, x, f)
+        f = similar(x)
+        @test FastTransforms.horner!(c, x, f) == f
         fd = T[sum(c[k]*x^(k-1) for k in 1:length(c)) for x in x]
         @test f ≈ fd
-        FastTransforms.clenshaw!(c, x, f)
+        @test FastTransforms.clenshaw!(c, x, f) == f
         fd = T[sum(c[k]*cos((k-1)*acos(x)) for k in 1:length(c)) for x in x]
         @test f ≈ fd
         A = T[(2k+one(T))/(k+one(T)) for k in 0:length(c)-1]
@@ -19,7 +19,7 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         C = T[k/(k+one(T)) for k in 0:length(c)]
         phi0 = ones(T, length(x))
         c = cheb2leg(c)
-        FastTransforms.clenshaw!(c, A, B, C, x, phi0, f)
+        @test FastTransforms.clenshaw!(c, A, B, C, x, phi0, f) == f
         @test f ≈ fd
     end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index e5e341bd..33c88fd1 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,21 +1,13 @@
 using FastTransforms, LinearAlgebra, Test
 
 include("specialfunctionstests.jl")
-
 include("chebyshevtests.jl")
-
 include("quadraturetests.jl")
-
 include("libfasttransformstests.jl")
-
 include("nuffttests.jl")
-
 include("fftBigFloattests.jl")
-
 include("paduatests.jl")
-
 include("gaunttests.jl")
-
 include("hermitetests.jl")
-
 include("toeplitztests.jl")
+include("clenshawtests.jl")

From 9703ea6ac2a9c90df9b4be9f456f75fd16be89e0 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Thu, 16 Jul 2020 19:51:54 +0100
Subject: [PATCH 022/222] Support clenshaw! with any DenseColumnMajor blas
 vector (#113)

* Support clenshaw! with any DenseColumnMajor blas vector

* Fix out of bounds error

* reactivate coverage

* v0.9.3

* Allow strided coefficients

* fix c-call

* Test Zeros diagonal special case

* Update clenshawtests.jl

Co-authored-by: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
---
 .github/workflows/ci.yml |  7 +++---
 Project.toml             |  4 +++-
 README.md                |  2 +-
 src/FastTransforms.jl    |  6 +++--
 src/clenshaw.jl          | 18 +++++++++++----
 src/libfasttransforms.jl | 16 ++++++++-----
 test/clenshawtests.jl    | 49 ++++++++++++++++++++++++++++------------
 7 files changed, 70 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fa4a1153..b20d673f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -41,9 +41,10 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@latest
       - uses: julia-actions/julia-runtest@latest
-      - uses: julia-actions/julia-uploadcodecov@latest
-        env:
-          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v1
+        with:
+          file: lcov.info
   docs:
     name: Documentation
     runs-on: ubuntu-latest
diff --git a/Project.toml b/Project.toml
index 932b1e66..757836fb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,9 +1,10 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.9.2"
+version = "0.9.3"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
+ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
 BinaryProvider = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
 DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
@@ -19,6 +20,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "0.4, 0.5"
+ArrayLayouts = "0.3.7"
 BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"
diff --git a/README.md b/README.md
index 9540c6a9..ca584b8f 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # FastTransforms.jl
 
-[![Build Status](https://github.com/JuliaApproximation/FastTransforms.jl/workflows/CI/badge.svg)](https://github.com/JuliaApproximation/FastTransforms.jl/actions?query=workflow%3ACI) [![Travis](https://travis-ci.org/JuliaApproximation/FastTransforms.jl.svg?branch=master)](https://travis-ci.org/JuliaApproximation/FastTransforms.jl) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-latest-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/latest)
+[![Build Status](https://github.com/JuliaApproximation/FastTransforms.jl/workflows/CI/badge.svg)](https://github.com/JuliaApproximation/FastTransforms.jl/actions?query=workflow%3ACI) [![Travis](https://travis-ci.org/JuliaApproximation/FastTransforms.jl.svg?branch=master)](https://travis-ci.org/JuliaApproximation/FastTransforms.jl) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/dev)
 
 `FastTransforms.jl` allows the user to conveniently work with orthogonal polynomials with degrees well into the millions.
 
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index bf4bde4d..452a1a1c 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,7 +1,7 @@
 module FastTransforms
 
 using FastGaussQuadrature, LinearAlgebra
-using Reexport, SpecialFunctions, ToeplitzMatrices, FillArrays
+using Reexport, SpecialFunctions, ToeplitzMatrices, FillArrays, ArrayLayouts
 
 import DSP
 
@@ -48,6 +48,8 @@ export plan_leg2cheb, plan_cheb2leg, plan_ultra2ultra, plan_jac2jac,
        plan_tet2cheb, plan_tet_synthesis, plan_tet_analysis,
        plan_spinsph2fourier, plan_spinsph_synthesis, plan_spinsph_analysis
 
+include("clenshaw.jl")
+
 include("libfasttransforms.jl")
 
 export plan_nufft, plan_nufft1, plan_nufft2, plan_nufft3, plan_inufft1, plan_inufft2
@@ -96,6 +98,6 @@ lgamma(x) = logabsgamma(x)[1]
 
 include("specialfunctions.jl")
 
-include("clenshaw.jl")
+
 
 end # module
diff --git a/src/clenshaw.jl b/src/clenshaw.jl
index 619c71ae..51461f1b 100644
--- a/src/clenshaw.jl
+++ b/src/clenshaw.jl
@@ -68,10 +68,14 @@ function clenshaw!(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::A
 end
 
 
-@inline _clenshaw_next(n, A, B, C, x, c, bn1, bn2) = muladd(muladd(A[n],x,B[n]), bn1, muladd(-C[n+1],bn2,c[n]))
-@inline _clenshaw_next(n, A, ::Zeros, C, x, c, bn1, bn2) = muladd(A[n]*x, bn1, muladd(-C[n+1],bn2,c[n]))
+Base.@propagate_inbounds _clenshaw_next(n, A, B, C, x, c, bn1, bn2) = muladd(muladd(A[n],x,B[n]), bn1, muladd(-C[n+1],bn2,c[n]))
+Base.@propagate_inbounds _clenshaw_next(n, A, ::Zeros, C, x, c, bn1, bn2) = muladd(A[n]*x, bn1, muladd(-C[n+1],bn2,c[n]))
 # Chebyshev U
-@inline _clenshaw_next(n, A::AbstractFill, ::Zeros, C::Ones, x, c, bn1, bn2) = muladd(getindex_value(A)*x, bn1, -bn2+c[n])
+Base.@propagate_inbounds _clenshaw_next(n, A::AbstractFill, ::Zeros, C::Ones, x, c, bn1, bn2) = muladd(getindex_value(A)*x, bn1, -bn2+c[n])
+
+# allow special casing first arg, for ChebyshevT in OrthogonalPolynomialsQuasi
+Base.@propagate_inbounds _clenshaw_first(A, B, C, x, c, bn1, bn2) = muladd(muladd(A[1],x,B[1]), bn1, muladd(-C[2],bn2,c[1]))
+
 
 """
     clenshaw(c, A, B, C, x)
@@ -90,9 +94,11 @@ function clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::Ab
     @inbounds begin
         bn2 = zero(T)
         bn1 = convert(T,c[N])
-        for n = N-1:-1:1
+        N == 1 && return bn1
+        for n = N-1:-1:2
             bn1,bn2 = _clenshaw_next(n, A, B, C, x, c, bn1, bn2),bn1
         end
+        bn1 = _clenshaw_first(A, B, C, x, c, bn1, bn2)
     end
     bn1
 end
@@ -120,7 +126,9 @@ clenshaw!(c::AbstractVector, x::AbstractVector) = clenshaw!(c, x, x)
 evaluates the first-kind Chebyshev (T) expansion with coefficients `c` at points `x`,
 overwriting `f` with the results.
 """
-function clenshaw!(c::AbstractVector, x::AbstractVector, f::AbstractVector)
+clenshaw!(c::AbstractVector, x::AbstractVector, f::AbstractVector) = _clenshaw!(MemoryLayout(c), MemoryLayout(x), MemoryLayout(f), c, x, f)
+
+function _clenshaw!(_, _, _, c::AbstractVector, x::AbstractVector, f::AbstractVector)
     f .= clenshaw.(Ref(c), x)
 end
 
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index a1efba39..32c9a959 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -73,15 +73,19 @@ function check_clenshaw_points(x, ϕ₀, f)
     length(x) == length(ϕ₀) == length(f) || throw(ArgumentError("Dimensions must match"))
 end
 
-function clenshaw!(c::Vector{Float64}, x::Vector{Float64}, f::Vector{Float64})
-    @assert length(x) == length(f)
-    ccall((:ft_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, 1, length(x), x, f)
+function check_clenshaw_points(x, f)
+    length(x) == length(f) || throw(ArgumentError("Dimensions must match"))
+end
+
+function _clenshaw!(::AbstractStridedLayout, ::AbstractColumnMajor, ::AbstractColumnMajor, c::AbstractVector{Float64}, x::AbstractVector{Float64}, f::AbstractVector{Float64})
+    @boundscheck check_clenshaw_points(x, f)
+    ccall((:ft_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, stride(c,1), length(x), x, f)
     f
 end
 
-function clenshaw!(c::Vector{Float32}, x::Vector{Float32}, f::Vector{Float32})
-    @assert length(x) == length(f)
-    ccall((:ft_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, 1, length(x), x, f)
+function _clenshaw!(::AbstractStridedLayout, ::AbstractColumnMajor, ::AbstractColumnMajor, c::AbstractVector{Float32}, x::AbstractVector{Float32}, f::AbstractVector{Float32})
+    @boundscheck check_clenshaw_points(x, f)
+    ccall((:ft_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, stride(c,1), length(x), x, f)
     f
 end
 
diff --git a/test/clenshawtests.jl b/test/clenshawtests.jl
index 8952ad39..aff2141f 100644
--- a/test/clenshawtests.jl
+++ b/test/clenshawtests.jl
@@ -3,20 +3,34 @@ import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrenc
 
 @testset "clenshaw" begin
     @testset "Chebyshev T" begin
-        c = [1,2,3]
-        cf = float(c)
-        @test @inferred(clenshaw(c,1)) ≡ 1 + 2 + 3
-        @test @inferred(clenshaw(c,0)) ≡ 1 + 0 - 3
-        @test @inferred(clenshaw(c,0.1)) == 1 + 2*0.1 + 3*cos(2acos(0.1))
-        @test @inferred(clenshaw(c,[-1,0,1])) == clenshaw!(c,[-1,0,1]) == [2,-2,6]
-        @test clenshaw(c,[-1,0,1]) isa Vector{Int}
-        @test @inferred(clenshaw(Float64[],1)) ≡ 0.0
+        for elty in (Float64, Float32)
+            c = [1,2,3]
+            cf = elty.(c)
+            @test @inferred(clenshaw(c,1)) ≡ 1 + 2 + 3
+            @test @inferred(clenshaw(c,0)) ≡ 1 + 0 - 3
+            @test @inferred(clenshaw(c,0.1)) == 1 + 2*0.1 + 3*cos(2acos(0.1))
+            @test @inferred(clenshaw(c,[-1,0,1])) == clenshaw!(c,[-1,0,1]) == [2,-2,6]
+            @test clenshaw(c,[-1,0,1]) isa Vector{Int}
+            @test @inferred(clenshaw(elty[],1)) ≡ zero(elty)
 
-        x = [1,0,0.1]
-        @test @inferred(clenshaw(c,x)) ≈ @inferred(clenshaw!(c,copy(x))) ≈ 
-            @inferred(clenshaw!(c,x,similar(x))) ≈
-            @inferred(clenshaw(cf,x)) ≈ @inferred(clenshaw!(cf,copy(x))) ≈ 
-            @inferred(clenshaw!(cf,x,similar(x))) ≈ [6,-2,-1.74]
+            x = elty[1,0,0.1]
+            @test @inferred(clenshaw(c,x)) ≈ @inferred(clenshaw!(c,copy(x))) ≈ 
+                @inferred(clenshaw!(c,x,similar(x))) ≈
+                @inferred(clenshaw(cf,x)) ≈ @inferred(clenshaw!(cf,copy(x))) ≈ 
+                @inferred(clenshaw!(cf,x,similar(x))) ≈ elty[6,-2,-1.74]
+
+            @testset "Strided" begin
+                cv = view(cf,:)
+                xv = view(x,:)
+                @test clenshaw!(cv, xv, similar(xv)) == clenshaw!(cf,x,similar(x))
+
+                cv2 = view(cf,1:2:3)
+                @test clenshaw!(cv2, xv, similar(xv)) == clenshaw([1,3], x)
+
+                # modifies x and xv
+                @test clenshaw!(cv2, xv) == xv == x == clenshaw([1,3], elty[1,0,0.1])
+            end
+        end
     end
 
     @testset "Chebyshev U" begin
@@ -101,6 +115,13 @@ import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrenc
         @test v_f isa Vector{Float64}
 
         j = 3
-        clenshaw([zeros(Int,j-1); 1; zeros(Int,N-j)], A, B, C, 1) == v_i[j]
+        @test clenshaw([zeros(Int,j-1); 1; zeros(Int,N-j)], A, B, C, 1) == v_i[j]
+    end
+
+    @testset "Zeros diagonal" begin
+        N = 10; A = randn(N); B = Zeros{Int}(N); C = randn(N+1)
+        @test forwardrecurrence(N, A, B, C, 0.1) == forwardrecurrence(N, A, Vector(B), C, 0.1)
+        c = randn(N)
+        @test clenshaw(c, A, B, C, 0.1) == clenshaw(c, A, Vector(B), C, 0.1)
     end
 end
\ No newline at end of file

From 6c1f969d6601a7a7d3409c6201c40b647966bcb7 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 29 Jul 2020 14:40:04 -0500
Subject: [PATCH 023/222] Feat libfasttransforms v0.3.3 (#115)

* support v0.3.3

add linux jobs on travis

* travis didn't bite

* had jobs and matrix

* allow arm64 to fail
---
 .travis.yml     | 15 +++++++++++++--
 Project.toml    |  4 ++--
 README.md       |  2 +-
 deps/build.jl   | 17 ++++++-----------
 src/clenshaw.jl |  9 ++++-----
 5 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index f60485ef..69afe595 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,15 +2,26 @@
 language: julia
 os:
   - freebsd
+  - linux
+arch:
+  - x64
+  - x86
+  - arm64
 julia:
   - 1.3
   - 1.4
   - nightly
-notifications:
-  email: false
 jobs:
+  exclude:
+    - os: freebsd
+      arch: x86
+    - os: freebsd
+      arch: arm64
   allow_failures:
     - julia: nightly
+    - arch: arm64
+notifications:
+  email: false
 cache:
   directories:
     - $HOME/.julia/artifacts
diff --git a/Project.toml b/Project.toml
index 757836fb..3d8285d9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.9.3"
+version = "0.9.4"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -25,7 +25,7 @@ BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.3.2"
+FastTransforms_jll = "0.3.3"
 FillArrays = "0.8"
 Reexport = "0.2"
 SpecialFunctions = "0.8, 0.9, 0.10"
diff --git a/README.md b/README.md
index ca584b8f..797a7abb 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ This package provides a Julia wrapper for the [C library](https://github.com/Mik
 
 ## Installation
 
-Installation, which uses [BinaryBuilder](https://github.com/JuliaPackaging/BinaryBuilder.jl) for Intel processors (Sandybridge and beyond), may be as straightforward as:
+Installation, which uses [BinaryBuilder](https://github.com/JuliaPackaging/BinaryBuilder.jl) for all of Julia's supported platforms (in particular Sandybridge Intel processors and beyond), may be as straightforward as:
 
 ```julia
 pkg> add FastTransforms
diff --git a/deps/build.jl b/deps/build.jl
index 5c6062be..e136faaf 100644
--- a/deps/build.jl
+++ b/deps/build.jl
@@ -1,11 +1,7 @@
 using BinaryProvider
 import Libdl
 
-version = v"0.3.2"
-
-if arch(platform_key_abi()) != :x86_64
-    @warn "FastTransforms has only been tested on x86_64 architectures."
-end
+version = v"0.3.3"
 
 const extension = Sys.isapple() ? "dylib" : Sys.islinux() ? "so" : Sys.iswindows() ? "dll" : ""
 
@@ -15,15 +11,14 @@ print_error() = error(
     print_platform_error(platform_key_abi())
 )
 
-print_platform_error(p::Platform) = "On $(BinaryProvider.platform_name(p)), please consider opening a pull request to add support.\n"
-print_platform_error(p::MacOS) = "On MacOS\n\tbrew install gcc@8 fftw mpfr\n"
-print_platform_error(p::Linux) = "On Linux\n\tsudo apt-get install gcc-8 libblas-dev libopenblas-base libfftw3-dev libmpfr-dev\n"
+print_platform_error(p::Platform) = "On $(BinaryProvider.platform_name(p)), please consider opening a pull request to add support to build from source.\n"
+print_platform_error(p::MacOS) = "On MacOS\n\tbrew install libomp fftw mpfr\n"
+print_platform_error(p::Linux) = "On Linux\n\tsudo apt-get install libomp-dev libblas-dev libopenblas-base libfftw3-dev libmpfr-dev\n"
 print_platform_error(p::Windows) = "On Windows\n\tvcpkg install openblas:x64-windows fftw3[core,threads]:x64-windows mpir:x64-windows mpfr:x64-windows\n"
 
 ft_build_from_source = get(ENV, "FT_BUILD_FROM_SOURCE", "false")
 if ft_build_from_source == "true"
     make = Sys.iswindows() ? "mingw32-make" : "make"
-    compiler = Sys.isapple() ? "CC=gcc-8" : "CC=gcc"
     flags = Sys.isapple() ? "FT_USE_APPLEBLAS=1" : Sys.iswindows() ? "FT_FFTW_WITH_COMBINED_THREADS=1" : ""
     script = """
         set -e
@@ -37,8 +32,8 @@ if ft_build_from_source == "true"
             git clone -b v$version https://github.com/MikaelSlevinsky/FastTransforms.git FastTransforms
         fi
         cd FastTransforms
-        $make assembly $compiler
-        $make lib $compiler $flags
+        $make assembly
+        $make lib $flags
         cd ..
         mv -f FastTransforms/libfasttransforms.$extension libfasttransforms.$extension
     """
diff --git a/src/clenshaw.jl b/src/clenshaw.jl
index 51461f1b..05e87e89 100644
--- a/src/clenshaw.jl
+++ b/src/clenshaw.jl
@@ -4,7 +4,7 @@
 evaluates the orthogonal polynomials at points `x`,
 where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
 as defined in DLMF,
-overwriting `v` with the results.   
+overwriting `v` with the results.
 """
 function forwardrecurrence!(v::AbstractVector{T}, A::AbstractVector, B::AbstractVector, C::AbstractVector, x) where T
     N = length(v)
@@ -51,7 +51,7 @@ where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coeffici
 as defined in DLMF,
 overwriting `x` with the results.
 """
-clenshaw!(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector) = 
+clenshaw!(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector) =
     clenshaw!(c, A, B, C, x, Ones{eltype(x)}(length(x)), x)
 
 
@@ -85,7 +85,7 @@ where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coeffici
 as defined in DLMF.
 `x` may also be a single `Number`.
 """
-     
+
 function clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number)
     N = length(c)
     T = promote_type(eltype(c),eltype(A),eltype(B),eltype(C),typeof(x))
@@ -104,7 +104,7 @@ function clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::Ab
 end
 
 
-clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector) = 
+clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector) =
     clenshaw!(c, A, B, C, copy(x))
 
 ###
@@ -157,4 +157,3 @@ function clenshaw(c::AbstractVector, x::Number)
 end
 
 clenshaw(c::AbstractVector, x::AbstractVector) = clenshaw!(c, copy(x))
-

From 2a2c5a3cb1cbf7a8d636207a09d20f2ac6bdc018 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 7 Aug 2020 15:56:35 +0100
Subject: [PATCH 024/222] CompatHelper: bump compat for "FillArrays" to "0.9"
 (#116)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 3d8285d9..20940050 100644
--- a/Project.toml
+++ b/Project.toml
@@ -26,7 +26,7 @@ DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.3.3"
-FillArrays = "0.8"
+FillArrays = "0.8, 0.9"
 Reexport = "0.2"
 SpecialFunctions = "0.8, 0.9, 0.10"
 ToeplitzMatrices = "0.6"

From 7fec3d0d8052708ab6f5d744639ef593c7bf044b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 10 Aug 2020 22:42:22 +0100
Subject: [PATCH 025/222] CompatHelper: bump compat for "ArrayLayouts" to "0.4"
 (#117)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 20940050..079c7b14 100644
--- a/Project.toml
+++ b/Project.toml
@@ -20,7 +20,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "0.4, 0.5"
-ArrayLayouts = "0.3.7"
+ArrayLayouts = "0.3.7, 0.4"
 BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"

From dd2c57747f04fc67c0375651a539a9ba1912019c Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 11 Aug 2020 11:11:29 -0500
Subject: [PATCH 026/222] update CI for 1.5

---
 .github/workflows/CompatHelper.yml | 2 +-
 .github/workflows/ci.yml           | 4 ++--
 .travis.yml                        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index 179e7a99..d486cb1b 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        julia-version: [1.2.0]
+        julia-version: [1.3.0]
         julia-arch: [x86]
         os: [ubuntu-latest]
     steps:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b20d673f..880f7f81 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,7 +10,7 @@ jobs:
       matrix:
         version:
           - '1.3'
-          - '1.4'
+          - '1.5'
           - 'nightly'
         os:
           - ubuntu-latest
@@ -52,7 +52,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
         with:
-          version: '1.3'
+          version: '1.5'
       - run: |
           julia --project=docs -e '
             using Pkg
diff --git a/.travis.yml b/.travis.yml
index 69afe595..83adbc61 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,7 +9,7 @@ arch:
   - arm64
 julia:
   - 1.3
-  - 1.4
+  - 1.5
   - nightly
 jobs:
   exclude:

From 49d7eec8de6ef6fc312fb9e658df0902d37a8ecf Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 17 Aug 2020 13:43:07 +0100
Subject: [PATCH 027/222] Make chebyshev transforms type stable, make points
 lazy (#114)

* Make chebyshev transforms type stable, make points lazy

* Update chebyshevtransform.jl

* Make type stable

* Chebyshev U

* Support ArrayLayouts v0.4

* Update clenshaw.jl

* Update clenshaw.jl

* Int64 -> Int

* Allow empty Chebyshev transform
---
 Project.toml              |   2 +-
 src/FastTransforms.jl     |   5 +-
 src/chebyshevtransform.jl | 412 ++++++++++++++++++++++++--------------
 src/clenshaw.jl           |   9 +-
 src/clenshawcurtis.jl     |   2 +-
 src/fejer.jl              |   2 +-
 test/chebyshevtests.jl    | 190 ++++++++++++------
 test/quadraturetests.jl   |   2 +-
 8 files changed, 403 insertions(+), 221 deletions(-)

diff --git a/Project.toml b/Project.toml
index 079c7b14..e014857d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.9.4"
+version = "0.10.0"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 452a1a1c..769c7d8a 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -9,7 +9,7 @@ import DSP
 @reexport using FFTW
 
 import Base: unsafe_convert, eltype, ndims, adjoint, transpose, show, *, \,
-             inv, size, view
+             inv, length, size, view, getindex
 
 import Base.GMP: Limb
 
@@ -66,7 +66,8 @@ include("PaduaTransform.jl")
 export plan_chebyshevtransform, plan_ichebyshevtransform, plan_chebyshevtransform!, plan_ichebyshevtransform!,
             chebyshevtransform, ichebyshevtransform, chebyshevpoints,
             plan_chebyshevutransform, plan_ichebyshevutransform, plan_chebyshevutransform!, plan_ichebyshevutransform!,
-            chebyshevutransform, ichebyshevutransform
+            chebyshevutransform, ichebyshevutransform,
+            chebyshevtransform!, ichebyshevtransform!, chebyshevutransform!, ichebyshevutransform!
 
 include("chebyshevtransform.jl")
 
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index a705ca49..904fe69f 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -1,161 +1,219 @@
 ## Transforms take values at Chebyshev points of the first and second kinds and produce Chebyshev coefficients
 
+abstract type ChebyshevPlan{T} <: Plan{T} end
 
-struct ChebyshevTransformPlan{T,kind,inplace,P} <: Plan{T}
-    plan::P
+size(P::ChebyshevPlan) = isdefined(P, :plan) ? size(P.plan) : (0,)
+length(P::ChebyshevPlan) = isdefined(P, :plan) ? length(P.plan) : 0
+
+
+# Check whether a ChebyshevPlan is applicable to a given input array, and
+# throw an informative error if not:
+function assert_applicable(p::ChebyshevPlan{T}, X::StridedArray{T}) where T
+    if size(X) != size(p)
+        throw(ArgumentError("Chebyshev plan applied to wrong-size array"))
+    end
 end
 
-ChebyshevTransformPlan{k,inp}(plan) where {k,inp} =
-    ChebyshevTransformPlan{eltype(plan),k,inp,typeof(plan)}(plan)
+struct ChebyshevTransformPlan{T,kind,inplace,P} <: ChebyshevPlan{T}
+    plan::FFTW.r2rFFTWPlan{T,P,true,1,UnitRange{Int}}
+    ChebyshevTransformPlan{T,kind,inplace,P}(plan) where {T,kind,inplace,P} = new{T,kind,inplace,P}(plan)
+    ChebyshevTransformPlan{T,kind,inplace,P}() where {T,kind,inplace,P} = new{T,kind,inplace,P}()
+end
 
+ChebyshevTransformPlan{T,kind,inplace}(plan::FFTW.r2rFFTWPlan{T,P}) where {T,kind,inplace,P} = 
+    ChebyshevTransformPlan{T,kind,inplace,P}(plan)
 
+ChebyshevTransformPlan{T,kind,inplace}(plan::ChebyshevTransformPlan{T,kind,inp,P}) where {T,kind,inplace,inp,P} = 
+    ChebyshevTransformPlan{T,kind,inplace,P}(plan.plan)
 
-function plan_chebyshevtransform!(x::AbstractVector{T}; kind::Integer=1) where T<:fftwNumber
-    if kind == 1
-        plan = isempty(x) ? fill(one(T),1,length(x)) : FFTW.plan_r2r!(x, FFTW.REDFT10)
-        ChebyshevTransformPlan{1,true}(plan)
-    elseif kind == 2
-        plan = length(x) ≤ 1 ? fill(one(T),1,length(x)) : FFTW.plan_r2r!(x, FFTW.REDFT00)
-        ChebyshevTransformPlan{2,true}(plan)
+function plan_chebyshevtransform!(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+    if isempty(x)
+        ChebyshevTransformPlan{T,1,true,(5,)}()
+    else
+        ChebyshevTransformPlan{T,1,true,(5,)}(FFTW.plan_r2r!(x, FFTW.REDFT10))
     end
 end
+function plan_chebyshevtransform!(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
+    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
+    ChebyshevTransformPlan{T,2,true,(3,)}(FFTW.plan_r2r!(x, FFTW.REDFT00))
+end
+
 
-function plan_chebyshevtransform(x::AbstractVector{T};kind::Integer=1) where T<:fftwNumber
-    plan = plan_chebyshevtransform!(x;kind=kind)
-    ChebyshevTransformPlan{kind,false}(plan)
+function plan_chebyshevtransform(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+    if isempty(x)
+        ChebyshevTransformPlan{T,1,false,(5,)}()
+    else
+        ChebyshevTransformPlan{T,1,false,(5,)}(FFTW.plan_r2r!(x, FFTW.REDFT10))
+    end
 end
+function plan_chebyshevtransform(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
+    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
+    ChebyshevTransformPlan{T,2,false,(3,)}(FFTW.plan_r2r!(x, FFTW.REDFT00))
+end
+
+plan_chebyshevtransform!(x::AbstractVector) = plan_chebyshevtransform!(x, Val(1))
+plan_chebyshevtransform(x::AbstractVector) = plan_chebyshevtransform(x, Val(1))
+
 
-function *(P::ChebyshevTransformPlan{T,1,true},x::AbstractVector{T}) where T
+function *(P::ChebyshevTransformPlan{T,1,true}, x::AbstractVector{T}) where T
     n = length(x)
-    n ≤ 1 && return x
+    assert_applicable(P, x)
+    n == 0 && return x
 
-    x = P.plan*x
-    x[1] /= 2
-    lmul!(inv(convert(T,n)), x)
+    y = P.plan*x # will be  === x if in-place
+    y[1] /= 2
+    lmul!(inv(convert(T,n)), y)
 end
 
 function *(P::ChebyshevTransformPlan{T,2,true}, x::AbstractVector{T}) where T
     n = length(x)
-    n ≤ 1 && return x
-
-    x = P.plan*x
-    x[1] /= 2; x[end] /= 2
-    lmul!(inv(convert(T,n-1)),x)
+    y = P.plan*x # will be  === x if in-place
+    y[1] /= 2; y[end] /= 2
+    lmul!(inv(convert(T,n-1)),y)
 end
 
-chebyshevtransform!(x::AbstractVector{T};kind::Integer=1) where {T<:fftwNumber} =
-    plan_chebyshevtransform!(x;kind=kind)*x
+*(P::ChebyshevTransformPlan{T,k,false}, x::AbstractVector{T}) where {T,k} = 
+    ChebyshevTransformPlan{T,k,true}(P)*copy(x)
 
-chebyshevtransform(x;kind::Integer=1) = chebyshevtransform!(copy(x);kind=kind)
+chebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where T<:fftwNumber =
+    plan_chebyshevtransform!(x, kind)*x
 
-*(P::ChebyshevTransformPlan{T,k,false}, x::AbstractVector{T}) where {T,k} = P.plan*copy(x)
 
+"""
+    chebyshevtransform(x, kind=Val(1))
 
+transforms from values on a Chebyshev grid of the first or second kind to Chebyshev
+coefficients.
+"""
+chebyshevtransform(x, kind=Val(1)) = chebyshevtransform!(copy(x), kind)
 
 
 ## Inverse transforms take Chebyshev coefficients and produce values at Chebyshev points of the first and second kinds
 
 
-struct IChebyshevTransformPlan{T,kind,inplace,P}
-    plan::P
+struct IChebyshevTransformPlan{T,kind,inplace,P} <: ChebyshevPlan{T}
+    plan::FFTW.r2rFFTWPlan{T,P,true,1,UnitRange{Int}}
+    IChebyshevTransformPlan{T,kind,inplace,P}(plan) where {T,kind,inplace,P} = new{T,kind,inplace,P}(plan)
+    IChebyshevTransformPlan{T,kind,inplace,P}() where {T,kind,inplace,P} = new{T,kind,inplace,P}()
 end
 
+IChebyshevTransformPlan{T,kind,inplace}(F::FFTW.r2rFFTWPlan{T,P}) where {T,kind,inplace,P} = 
+    IChebyshevTransformPlan{T,kind,inplace,P}(F)
+
+IChebyshevTransformPlan{T,kind,true}(F::IChebyshevTransformPlan{T,kind,false,P}) where {T,kind,P} = 
+    IChebyshevTransformPlan{T,kind,true,P}(F.plan)
+
+size(P::IChebyshevTransformPlan) = isdefined(P, :plan) ? size(P.plan) : (0,)
+length(P::IChebyshevTransformPlan) = isdefined(P, :plan) ? length(P.plan) : 0
+
+
 # second kind Chebyshev transforms share a plan with their inverse
 # so we support this via inv
-inv(P::ChebyshevTransformPlan{T,2,true}) where T = IChebyshevTransformPlan{T,2,true,typeof(P)}(P)
-inv(P::IChebyshevTransformPlan{T,2,true}) where T = P.plan
+inv(P::ChebyshevTransformPlan{T,2,inp}) where {T,inp} = IChebyshevTransformPlan{T,2,inp}(P.plan)
+inv(P::IChebyshevTransformPlan{T,2,inp}) where {T,inp} = ChebyshevTransformPlan{T,2,inp}(P.plan)
+
 
 \(P::ChebyshevTransformPlan, x::AbstractArray) = inv(P) * x
 \(P::IChebyshevTransformPlan, x::AbstractArray) = inv(P) * x
 
 
-function plan_ichebyshevtransform!(x::AbstractVector{T};kind::Integer=1) where T<:fftwNumber
-    if kind == 1
-        plan = isempty(x) ? fill(one(T),1,length(x)) : FFTW.plan_r2r!(x, FFTW.REDFT01)
-        IChebyshevTransformPlan{T,1,true,typeof(plan)}(plan)
-    elseif kind == 2
-        inv(plan_chebyshevtransform!(x;kind=2))
+function plan_ichebyshevtransform!(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+    if isempty(x)
+        IChebyshevTransformPlan{T,1,true,(4,)}()
+    else
+        IChebyshevTransformPlan{T,1,true,(4,)}(FFTW.plan_r2r!(x, FFTW.REDFT01))
     end
 end
 
-function plan_ichebyshevtransform(x::AbstractVector{T};kind::Integer=1) where T<:fftwNumber
-    plan = plan_ichebyshevtransform!(similar(Vector{T},axes(x));kind=kind)
-    IChebyshevTransformPlan{T,kind,false,typeof(plan)}(plan)
+function plan_ichebyshevtransform!(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
+    inv(plan_chebyshevtransform!(x, Val(2)))
 end
 
-function *(P::IChebyshevTransformPlan{T,1,true},x::AbstractVector{T}) where T<:fftwNumber
-    isempty(x) && return x
-    x[1] *=2
-    x = lmul!(convert(T,0.5), P.plan*x)
-    x
+function plan_ichebyshevtransform(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+    if isempty(x)
+        IChebyshevTransformPlan{T,1,false,(4,)}()
+    else
+        IChebyshevTransformPlan{T,1,false,(4,)}(FFTW.plan_r2r!(x, FFTW.REDFT01))
+    end
 end
 
-function *(P::IChebyshevTransformPlan{T,2,true},x::AbstractVector{T}) where T<:fftwNumber
-    n = length(x)
-    n ≤ 1 && return x
-    x[1] *= 2; x[end] *= 2
-    x = P.plan*x
-    x[1] *= 2; x[end] *= 2
-    lmul!(convert(T,0.5(n-1)),x)
+function plan_ichebyshevtransform(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
+    inv(plan_chebyshevtransform(x, Val(2)))
 end
 
-ichebyshevtransform!(x::AbstractVector{T};kind::Integer=1) where {T<:fftwNumber} =
-    plan_ichebyshevtransform!(x;kind=kind)*x
+plan_ichebyshevtransform!(x::AbstractVector) = plan_ichebyshevtransform!(x, Val(1))
+plan_ichebyshevtransform(x::AbstractVector) = plan_ichebyshevtransform(x, Val(1))
 
-ichebyshevtransform(x;kind::Integer=1) = ichebyshevtransform!(copy(x); kind=kind)
 
-*(P::IChebyshevTransformPlan{T,k,false},x::AbstractVector{T}) where {T,k} = P.plan*copy(x)
+function *(P::IChebyshevTransformPlan{T,1,true}, x::AbstractVector{T}) where T<:fftwNumber
+    n = length(x)
+    assert_applicable(P, x)
+    n == 0 && return x
 
-## Code generation for integer inputs
+    x[1] *= 2
+    x = lmul!(convert(T,0.5), P.plan*x)
+    x
+end 
+function *(P::IChebyshevTransformPlan{T,2, true}, x::AbstractVector{T}) where T<:fftwNumber
+    n = length(x)
+    assert_applicable(P, x)
 
-for func in (:chebyshevtransform,:ichebyshevtransform)
-    @eval $func(x::AbstractVector{T};kind::Integer=1) where {T<:Integer} = $func(convert(Float64,x);kind=kind)
+    x[1] *= 2; x[end] *= 2
+    x = ChebyshevTransformPlan{T,2,true}(P.plan)*x
+    x[1] *= 2; x[end] *= 2
+    lmul!(convert(T,0.5(n-1)),x)
 end
 
+*(P::IChebyshevTransformPlan{T,k,false},x::AbstractVector{T}) where {T,k} = 
+    IChebyshevTransformPlan{T,k,true}(P)*copy(x)
+
+ichebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where {T<:fftwNumber} =
+    plan_ichebyshevtransform!(x, kind)*x
+
+ichebyshevtransform(x, kind=Val(1)) = ichebyshevtransform!(copy(x), kind)
 
 # Matrix inputs
 #
 #
-function chebyshevtransform!(X::AbstractMatrix{T}; kind::Integer=1) where T<:fftwNumber
-    if kind == 1
-        if size(X) == (1,1)
-            X
-        else
-            X=FFTW.r2r!(X,FFTW.REDFT10)
-            X[:,1]/=2;X[1,:]/=2;
-            lmul!(1/(size(X,1)*size(X,2)),X)
-        end
-    elseif kind == 2
-        if size(X) == (1,1)
-            X
-        else
-            X=FFTW.r2r!(X,FFTW.REDFT00)
-            lmul!(1/((size(X,1)-1)*(size(X,2)-1)),X)
-            X[:,1]/=2;X[:,end]/=2
-            X[1,:]/=2;X[end,:]/=2
-            X
-        end
+function chebyshevtransform!(X::AbstractMatrix{T}, ::Val{1}) where T<:fftwNumber
+    if size(X) == (1,1)
+        X
+    else
+        X=FFTW.r2r!(X,FFTW.REDFT10)
+        X[:,1]/=2;X[1,:]/=2;
+        lmul!(1/(size(X,1)*size(X,2)),X)
+    end
+end
+
+function chebyshevtransform!(X::AbstractMatrix{T}, ::Val{2}) where T<:fftwNumber
+    if size(X,1) < 2 || size(X,2) < 2
+        throw(ArgumentError("Chebyshev plan applied to wrong-size array"))
+    else
+        X=FFTW.r2r!(X,FFTW.REDFT00)
+        lmul!(1/((size(X,1)-1)*(size(X,2)-1)),X)
+        X[:,1]/=2;X[:,end]/=2
+        X[1,:]/=2;X[end,:]/=2
+        X
     end
 end
 #
-function ichebyshevtransform!(X::AbstractMatrix{T}; kind::Integer=1) where T<:fftwNumber
-    if kind == 1
-        if size(X) == (1,1)
-            X
-        else
-            X[1,:]*=2;X[:,1]*=2
-            X = FFTW.r2r(X,FFTW.REDFT01)
-            lmul!(0.25, X)
-        end
-    elseif kind == 2
-        if size(X) == (1,1)
-            X
-        else
-            X[1,:]*=2;X[end,:]*=2;X[:,1]*=2;X[:,end]*=2
-            X=chebyshevtransform!(X;kind=kind)
-            X[1,:]*=2;X[end,:]*=2;X[:,1]*=2;X[:,end]*=2
-            lmul!((size(X,1)-1)*(size(X,2)-1)/4,X)
-        end
+function ichebyshevtransform!(X::AbstractMatrix{T}, ::Val{1}) where T<:fftwNumber
+    if size(X) == (1,1)
+        X
+    else
+        X[1,:]*=2;X[:,1]*=2
+        X = FFTW.r2r(X,FFTW.REDFT01)
+        lmul!(0.25, X)
+    end
+end
+function ichebyshevtransform!(X::AbstractMatrix{T}, ::Val{2}) where T<:fftwNumber
+    if size(X,1) < 2 || size(X,2) < 2
+        throw(ArgumentError("Chebyshev plan applied to wrong-size array"))
+    else
+        X[1,:]*=2;X[end,:]*=2;X[:,1]*=2;X[:,end]*=2
+        X=chebyshevtransform!(X, Val(2))
+        X[1,:]*=2;X[end,:]*=2;X[:,1]*=2;X[:,end]*=2
+        lmul!((size(X,1)-1)*(size(X,2)-1)/4,X)
     end
 end
 #
@@ -163,32 +221,51 @@ end
 
 ## Chebyshev U
 
-struct ChebyshevUTransformPlan{T,kind,inplace,P} <: Plan{T}
-    plan::P
+struct ChebyshevUTransformPlan{T,kind,inplace,P} <: ChebyshevPlan{T}
+    plan::FFTW.r2rFFTWPlan{T,P,true,1,UnitRange{Int}}
+    ChebyshevUTransformPlan{T,kind,inplace,P}(plan) where {T,kind,inplace,P} = new{T,kind,inplace,P}(plan)
+    ChebyshevUTransformPlan{T,kind,inplace,P}() where {T,kind,inplace,P} = new{T,kind,inplace,P}()
 end
 
-ChebyshevUTransformPlan{k,inp}(plan) where {k,inp} =
-    ChebyshevUTransformPlan{eltype(plan),k,inp,typeof(plan)}(plan)
+ChebyshevUTransformPlan{T,kind,inplace}(plan::FFTW.r2rFFTWPlan{T,P}) where {T,kind,inplace,P} = 
+    ChebyshevUTransformPlan{T,kind,inplace,P}(plan)
+
+ChebyshevUTransformPlan{T,kind,inplace}(plan::ChebyshevUTransformPlan{T,kind,inp,P}) where {T,kind,inplace,inp,P} = 
+    ChebyshevUTransformPlan{T,kind,inplace,P}(plan.plan)
 
 
 
-function plan_chebyshevutransform!(x::AbstractVector{T}; kind::Integer=1) where T<:fftwNumber
-    if kind == 1
-        plan = isempty(x) ? fill(one(T),1,length(x)) : FFTW.plan_r2r!(x, FFTW.RODFT10)
-        ChebyshevUTransformPlan{1,true}(plan)
-    elseif kind == 2
-        plan = length(x) ≤ 1 ? fill(one(T),1,length(x)) : FFTW.plan_r2r!(x, FFTW.RODFT00)
-        ChebyshevUTransformPlan{2,true}(plan)
+function plan_chebyshevutransform!(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+    if isempty(x)
+        ChebyshevUTransformPlan{T,1,true,(9,)}()
+    else
+        ChebyshevUTransformPlan{T,1,true,(9,)}(FFTW.plan_r2r!(x, FFTW.RODFT10))
     end
 end
+function plan_chebyshevutransform!(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
+    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
+    ChebyshevUTransformPlan{T,2,true,(7,)}(FFTW.plan_r2r!(x, FFTW.RODFT00))
+end
 
-function plan_chebyshevutransform(x::AbstractVector{T};kind::Integer=1) where T<:fftwNumber
-    plan = plan_chebyshevutransform!(x;kind=kind)
-    ChebyshevUTransformPlan{kind,false}(plan)
+function plan_chebyshevutransform(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+    if isempty(x)
+        ChebyshevUTransformPlan{T,1,false,(9,)}()
+    else
+        ChebyshevUTransformPlan{T,1,false,(9,)}(FFTW.plan_r2r!(x, FFTW.RODFT10))
+    end
+end
+function plan_chebyshevutransform(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
+    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
+    ChebyshevUTransformPlan{T,2,false,(7,)}(FFTW.plan_r2r!(x, FFTW.RODFT00))
 end
 
+plan_chebyshevutransform!(x::AbstractVector) = plan_chebyshevutransform!(x, Val(1))
+plan_chebyshevutransform(x::AbstractVector) = plan_chebyshevutransform(x, Val(1))
+
+
 function *(P::ChebyshevUTransformPlan{T,1,true},x::AbstractVector{T}) where T
     n = length(x)
+    assert_applicable(P, x)
     n ≤ 1 && return x
 
     for k=1:n # sqrt(1-x_j^2) weight
@@ -197,8 +274,9 @@ function *(P::ChebyshevUTransformPlan{T,1,true},x::AbstractVector{T}) where T
     P.plan * x
 end
 
-function *(P::ChebyshevUTransformPlan{T,2,true},x::AbstractVector{T}) where T
+function *(P::ChebyshevUTransformPlan{T,2,true}, x::AbstractVector{T}) where T
     n = length(x)
+    assert_applicable(P, x)
     n ≤ 1 && return x
 
     c = one(T)/ (n+1)
@@ -208,38 +286,66 @@ function *(P::ChebyshevUTransformPlan{T,2,true},x::AbstractVector{T}) where T
     lmul!(c, P.plan * x)
 end
 
-chebyshevutransform!(x::AbstractVector{T};kind::Integer=1) where {T<:fftwNumber} =
-    plan_chebyshevutransform!(x;kind=kind)*x
+chebyshevutransform!(x::AbstractVector{T}, kind=Val(1)) where {T<:fftwNumber} =
+    plan_chebyshevutransform!(x, kind)*x
 
-chebyshevutransform(x;kind::Integer=1) = chebyshevutransform!(copy(x);kind=kind)
 
-*(P::ChebyshevUTransformPlan{T,k,false},x::AbstractVector{T}) where {T,k} = P.plan*copy(x)
+"""
+    chebyshevutransform(x, ::Val{kind}=Val(1))
+
+transforms from values on a Chebyshev grid of the first or second kind to Chebyshev
+coefficients of the 2nd kind (Chebyshev U expansion).
+"""
+chebyshevutransform(x, kind=Val(1)) = chebyshevutransform!(copy(x), kind)
+
+*(P::ChebyshevUTransformPlan{T,k,false}, x::AbstractVector{T}) where {T,k} = ChebyshevUTransformPlan{T,k,true}(P)*copy(x)
 
 ## Inverse transforms take ChebyshevU coefficients and produce values at ChebyshevU points of the first and second kinds
 
 
-struct IChebyshevUTransformPlan{T,kind,inplace,P}
-    plan::P
+struct IChebyshevUTransformPlan{T,kind,inplace,P} <: ChebyshevPlan{T}
+    plan::FFTW.r2rFFTWPlan{T,P,true,1,UnitRange{Int}}
+    IChebyshevUTransformPlan{T,kind,inplace,P}(plan) where {T,kind,inplace,P} = new{T,kind,inplace,P}(plan)
+    IChebyshevUTransformPlan{T,kind,inplace,P}() where {T,kind,inplace,P} = new{T,kind,inplace,P}()
 end
 
+IChebyshevUTransformPlan{T,kind,inplace}(F::FFTW.r2rFFTWPlan{T,P}) where {T,kind,inplace,P} = 
+    IChebyshevUTransformPlan{T,kind,inplace,P}(F)
+
+IChebyshevUTransformPlan{T,kind,true}(F::IChebyshevUTransformPlan{T,kind,false,P}) where {T,kind,P} = 
+    IChebyshevUTransformPlan{T,kind,true,P}(F.plan)
 
-function plan_ichebyshevutransform!(x::AbstractVector{T};kind::Integer=1) where T<:fftwNumber
-    if kind == 1
-        plan = isempty(x) ? fill(one(T),1,length(x)) : FFTW.plan_r2r!(x, FFTW.RODFT01)
-        IChebyshevUTransformPlan{T,1,true,typeof(plan)}(plan)
-    elseif kind == 2
-        plan = length(x) ≤ 1 ? fill(one(T),1,length(x)) : FFTW.plan_r2r!(x, FFTW.RODFT00)
-        IChebyshevUTransformPlan{T,2,true,typeof(plan)}(plan)
+function plan_ichebyshevutransform!(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+    if isempty(x)
+        IChebyshevUTransformPlan{T,1,true,(8,)}()
+    else
+        IChebyshevUTransformPlan{T,1,true,(8,)}(FFTW.plan_r2r!(x, FFTW.RODFT01))
     end
 end
+function plan_ichebyshevutransform!(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
+    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
+    IChebyshevUTransformPlan{T,2,true,(7,)}(FFTW.plan_r2r!(x, FFTW.RODFT00))
+end
 
-function plan_ichebyshevutransform(x::AbstractVector{T}; kind::Integer=1) where T<:fftwNumber
-    plan = plan_ichebyshevutransform!(similar(Vector{T},axes(x)); kind=kind)
-    IChebyshevUTransformPlan{T,kind,false,typeof(plan)}(plan)
+function plan_ichebyshevutransform(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+    if isempty(x)
+        IChebyshevUTransformPlan{T,1,false,(8,)}()
+    else
+        IChebyshevUTransformPlan{T,1,false,(8,)}(FFTW.plan_r2r!(x, FFTW.RODFT01))
+    end
 end
+function plan_ichebyshevutransform(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
+    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
+    IChebyshevUTransformPlan{T,2,false,(7,)}(FFTW.plan_r2r!(x, FFTW.RODFT00))
+end
+
+plan_ichebyshevutransform!(x::AbstractVector) = plan_ichebyshevutransform!(x, Val(1))
+plan_ichebyshevutransform(x::AbstractVector) = plan_ichebyshevutransform(x, Val(1))
+
 
 function *(P::IChebyshevUTransformPlan{T,1,true}, x::AbstractVector{T}) where T<:fftwNumber
     n = length(x)
+    assert_applicable(P, x)
     n ≤ 1 && return x
 
     x = P.plan * x
@@ -253,6 +359,7 @@ end
 
 function *(P::IChebyshevUTransformPlan{T,2,true}, x::AbstractVector{T}) where T<:fftwNumber
     n = length(x)
+    assert_applicable(P, x)
     n ≤ 1 && return x
 
     c = one(T)/ (n+1)
@@ -264,38 +371,49 @@ function *(P::IChebyshevUTransformPlan{T,2,true}, x::AbstractVector{T}) where T<
     x
 end
 
-ichebyshevutransform!(x::AbstractVector{T};kind::Integer=1) where {T<:fftwNumber} =
-    plan_ichebyshevutransform!(x;kind=kind)*x
+ichebyshevutransform!(x::AbstractVector{T}, kind=Val(1)) where {T<:fftwNumber} =
+    plan_ichebyshevutransform!(x, kind)*x
+
+ichebyshevutransform(x, kind=Val(1)) = ichebyshevutransform!(copy(x), kind)
 
-ichebyshevutransform(x;kind::Integer=1) = ichebyshevutransform!(copy(x);kind=kind)
+*(P::IChebyshevUTransformPlan{T,k,false},x::AbstractVector{T}) where {T,k} = 
+    IChebyshevUTransformPlan{T,k,true}(P)*copy(x)
 
-*(P::IChebyshevUTransformPlan{T,k,false},x::AbstractVector{T}) where {T,k} = P.plan*copy(x)
 
 ## Code generation for integer inputs
 
-for func in (:chebyshevutransform,:ichebyshevutransform)
-    @eval $func(x::AbstractVector{T};kind::Integer=1) where {T<:Integer} = $func(convert(Float64,x);kind=kind)
+for func in (:chebyshevtransform,:ichebyshevtransform,:chebyshevutransform,:ichebyshevutransform)
+    @eval $func(x::AbstractVector{T}, kind=Val(1)) where {T<:Integer} = $func(convert(AbstractVector{Float64},x), kind)
 end
 
 
 
-
 ## points
 
-function chebyshevpoints(::Type{T}, n::Integer; kind::Int=1) where T<:Number
-    if kind == 1
-        T[sinpi((n-2k-one(T))/2n) for k=0:n-1]
-    elseif kind == 2
-        if n == 1
-            zeros(T,1)
-        else
-	    T[sinpi((n-2k-one(T))/(2n-2)) for k=0:n-1]
-        end
-    else
-        throw(ArgumentError("kind $kind not a valid kind of Chebyshev points"))
+struct ChebyshevGrid{kind,T} <: AbstractVector{T}
+    n::Int
+    function ChebyshevGrid{1,T}(n::Int) where T
+        n ≥ 0 || throw(ArgumentError("Number of points must be nonnehative"))
+        new{1,T}(n)
+    end
+    function ChebyshevGrid{2,T}(n::Int) where T
+        n ≥ 2 || throw(ArgumentError("Number of points must be greater than 2"))
+        new{2,T}(n)
     end
 end
-chebyshevpoints(n::Integer; kind::Int=1) = chebyshevpoints(Float64, n; kind=kind)
+
+ChebyshevGrid{kind}(n::Integer) where kind = ChebyshevGrid{kind,Float64}(n)
+
+size(g::ChebyshevGrid) = (g.n,)
+getindex(g::ChebyshevGrid{1,T}, k::Integer) where T =
+    sinpi(convert(T,g.n-2k+1)/(2g.n))
+
+getindex(g::ChebyshevGrid{2,T}, k::Integer) where T =
+    sinpi(convert(T,g.n-2k+1)/(2g.n-2))
+
+chebyshevpoints(::Type{T}, n::Integer, ::Val{kind}) where {T<:Number,kind} = ChebyshevGrid{kind,T}(n)
+chebyshevpoints(::Type{T}, n::Integer) where T = chebyshevpoints(T, n, Val(1))
+chebyshevpoints(n::Integer, kind=Val(1)) = chebyshevpoints(Float64, n, kind)
 
 
 # sin(nθ) coefficients to values at Clenshaw-Curtis nodes except ±1
diff --git a/src/clenshaw.jl b/src/clenshaw.jl
index 05e87e89..297df736 100644
--- a/src/clenshaw.jl
+++ b/src/clenshaw.jl
@@ -6,13 +6,12 @@ where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coeffici
 as defined in DLMF,
 overwriting `v` with the results.
 """
-function forwardrecurrence!(v::AbstractVector{T}, A::AbstractVector, B::AbstractVector, C::AbstractVector, x) where T
+function forwardrecurrence!(v::AbstractVector{T}, A::AbstractVector, B::AbstractVector, C::AbstractVector, x, p0=one(T)) where T
     N = length(v)
     N == 0 && return v
     length(A)+1 ≥ N && length(B)+1 ≥ N && length(C)+1 ≥ N || throw(ArgumentError("A, B, C must contain at least $(N-1) entries"))
-    p0 = one(T) # assume OPs are normalized to one for no
-    p1 = convert(T, N == 1 ? p0 : A[1]x + B[1]) # avoid accessing A[1]/B[1] if empty
-    _forwardrecurrence!(v, A, B, C, x, p0, p1)
+    p1 = convert(T, N == 1 ? p0 : muladd(A[1],x,B[1])*p0) # avoid accessing A[1]/B[1] if empty
+    _forwardrecurrence!(v, A, B, C, x, convert(T, p0), p1)
 end
 
 
@@ -60,7 +59,7 @@ clenshaw!(c, A, B, C, x, ϕ₀, f)
 
 evaluates the orthogonal polynomial expansion with coefficients `c` at points `x`,
 where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
-as defined in DLMF and ϕ₀ is the zeroth coefficient,
+as defined in DLMF and ϕ₀ is the zeroth polynomial,
 overwriting `f` with the results.
 """
 function clenshaw!(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector, ϕ₀::AbstractVector, f::AbstractVector)
diff --git a/src/clenshawcurtis.jl b/src/clenshawcurtis.jl
index 05f4d85a..535f9139 100644
--- a/src/clenshawcurtis.jl
+++ b/src/clenshawcurtis.jl
@@ -3,7 +3,7 @@ plan_clenshawcurtis(μ) = length(μ) > 1 ? FFTW.plan_r2r!(μ, FFTW.REDFT00) : fi
 """
 Compute nodes of the Clenshaw—Curtis quadrature rule.
 """
-clenshawcurtisnodes(::Type{T}, N::Int) where T = chebyshevpoints(T, N; kind = 2)
+clenshawcurtisnodes(::Type{T}, N::Int) where T = chebyshevpoints(T, N, Val(2))
 
 """
 Compute weights of the Clenshaw—Curtis quadrature rule with modified Chebyshev moments of the first kind ``\\mu``.
diff --git a/src/fejer.jl b/src/fejer.jl
index e51ef548..096b5592 100644
--- a/src/fejer.jl
+++ b/src/fejer.jl
@@ -3,7 +3,7 @@ plan_fejer1(μ) = FFTW.plan_r2r!(μ, FFTW.REDFT01)
 """
 Compute nodes of Fejer's first quadrature rule.
 """
-fejernodes1(::Type{T}, N::Int) where T = chebyshevpoints(T, N; kind = 1)
+fejernodes1(::Type{T}, N::Int) where T = chebyshevpoints(T, N, Val(1))
 
 """
 Compute weights of Fejer's first quadrature rule with modified Chebyshev moments of the first kind ``\\mu``.
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 8855a405..74ac1ada 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -2,153 +2,217 @@ using FastTransforms, Test
 
 @testset "Chebyshev transform"  begin
     @testset "Chebyshev points" begin
-        @test chebyshevpoints(10; kind=1) == chebyshevpoints(Float64, 10; kind=1)
-        @test chebyshevpoints(10; kind=2) == chebyshevpoints(Float64, 10; kind=2)
+        @test @inferred(chebyshevpoints(10)) == @inferred(chebyshevpoints(Float64, 10))
+        @test @inferred(chebyshevpoints(10, Val(2))) == @inferred(chebyshevpoints(Float64, 10, Val(2)))
         for T in (Float32, Float64, ComplexF32, ComplexF64)
-            @test chebyshevpoints(T, 0, kind=1) == chebyshevpoints(T, 0, kind=2) == T[]
-            @test chebyshevpoints(T, 1, kind=1) == chebyshevpoints(T, 1, kind=2) == T[0]
+            @test chebyshevpoints(T, 0) == T[]
+            @test chebyshevpoints(T, 1) == T[0]
 
             n = 20
-            @inferred(chebyshevpoints(T, n, kind=1))
-            @inferred(chebyshevpoints(T, n, kind=2))
-            @test_throws ArgumentError chebyshevpoints(n, kind=-1)
+            @test @inferred(chebyshevpoints(T, n)) == [sinpi(convert(T,n-2k+1)/(2n)) for k=1:n]
+            @test @inferred(chebyshevpoints(T, n, Val(2))) == [sinpi(convert(T,n-2k+1)/(2n-2)) for k=1:n]
+
+            @test_throws MethodError chebyshevpoints(n, Val(-1))
+            @test_throws ArgumentError chebyshevpoints(T, 0, Val(2))
+            @test_throws ArgumentError chebyshevpoints(T, 1, Val(2))
         end
     end
 
     @testset "Chebyshev first kind points <-> first kind coefficients" begin
         for T in (Float32, Float64, ComplexF32, ComplexF64)
             n = 20
-            p_1 = chebyshevpoints(T, n, kind=1)
+            p_1 = chebyshevpoints(T, n)
             f = exp.(p_1)
-            f̌ = chebyshevtransform(f; kind=1)
+            f̌ = @inferred(chebyshevtransform(f))
+            @test f̌ == chebyshevtransform!(copy(f))
 
             f̃ = x -> [cos(k*acos(x)) for k=0:n-1]' * f̌
             @test f̃(0.1) ≈ exp(T(0.1))
-            @test ichebyshevtransform(f̌; kind=1) ≈ exp.(p_1)
+            @test @inferred(ichebyshevtransform(f̌)) ≈ ichebyshevtransform!(copy(f̌)) ≈ exp.(p_1)
 
             f̃ = copy(f)
             f̄ = copy(f̌)
-            P = plan_chebyshevtransform(f; kind=1)
-            @test P*f == f̌
+            P = @inferred(plan_chebyshevtransform(f))
+            @test @inferred(P*f) == f̌
             @test f == f̃
-            P = plan_chebyshevtransform!(f; kind=1)
-            @test P*f == f̌
+            @test_throws ArgumentError P * T[1,2]
+            P = @inferred(plan_chebyshevtransform!(f))
+            @test @inferred(P*f) == f̌
             @test f == f̌
-            Pi = plan_ichebyshevtransform(f̌; kind=1)
-            @test Pi*f̌ ≈ f̃
+            @test_throws ArgumentError P * T[1,2]
+            Pi = @inferred(plan_ichebyshevtransform(f̌))
+            @test @inferred(Pi*f̌) ≈ f̃
             @test f̌ == f̄
-            Pi = plan_ichebyshevtransform!(f̌; kind=1)
-            @test Pi*f̌ ≈ f̃
+            @test_throws ArgumentError Pi * T[1,2]
+            Pi = @inferred(plan_ichebyshevtransform!(f̌))
+            @test @inferred(Pi*f̌) ≈ f̃
             @test f̌ ≈ f̃
+            @test_throws ArgumentError Pi * T[1,2]
 
-            @test chebyshevtransform(T[1]; kind=1) == T[1]
-            @test ichebyshevtransform(T[1]; kind=1) == T[1]
-            @test chebyshevtransform(T[]; kind=1) == T[]
-            @test ichebyshevtransform(T[]; kind=1) == T[]
+            @test chebyshevtransform(T[1]) == T[1]
+            @test ichebyshevtransform(T[1]) == T[1]
+            @test chebyshevtransform(T[]) == T[]
+            @test ichebyshevtransform(T[]) == T[]
         end
     end
     @testset "Chebyshev second kind points <-> first kind coefficients" begin
         for T in (Float32, Float64, ComplexF32, ComplexF64)
             n = 20
-            p_2 = chebyshevpoints(T, n, kind=2)
+            p_2 = chebyshevpoints(T, n, Val(2))
             f = exp.(p_2)
-            f̌ = chebyshevtransform(f; kind=2)
+            f̌ = @inferred(chebyshevtransform(f, Val(2)))
+            @test f̌ == chebyshevtransform!(copy(f), Val(2))
 
             f̃ = x -> [cos(k*acos(x)) for k=0:n-1]' * f̌
             @test f̃(0.1) ≈ exp(T(0.1))
-            @test ichebyshevtransform(f̌; kind=2) ≈ exp.(p_2)
+            @test @inferred(ichebyshevtransform(f̌, Val(2))) ≈ ichebyshevtransform!(copy(f̌), Val(2)) ≈ exp.(p_2)
 
-            P = plan_chebyshevtransform!(f; kind=2)
-            Pi = plan_ichebyshevtransform!(f; kind=2)
-            @test all((P \ copy(f)) .=== Pi * copy(f))
-            @test all((Pi \ copy(f̌)) .=== P * copy(f̌))
+            P = @inferred(plan_chebyshevtransform!(f, Val(2)))
+            Pi = @inferred(plan_ichebyshevtransform!(f, Val(2)))
+            @test all(@inferred(P \ copy(f)) .=== Pi * copy(f))
+            @test all(@inferred(Pi \ copy(f̌)) .=== P * copy(f̌))
             @test f ≈ P \ (P*copy(f)) ≈ P * (P\copy(f)) ≈ Pi \ (Pi*copy(f)) ≈ Pi * (Pi \ copy(f))
 
             f̃ = copy(f)
             f̄ = copy(f̌)
-            P = plan_chebyshevtransform(f; kind=2)
+            P = @inferred(plan_chebyshevtransform(f, Val(2)))
+            @test_throws ArgumentError P * T[1,2]
             @test P*f == f̌
             @test f == f̃
-            P = plan_chebyshevtransform!(f; kind=2)
+            P = @inferred(plan_chebyshevtransform!(f, Val(2)))
             @test P*f == f̌
             @test f == f̌
-            Pi = plan_ichebyshevtransform(f̌; kind=2)
+            @test_throws ArgumentError P * T[1,2]
+            Pi = @inferred(plan_ichebyshevtransform(f̌, Val(2)))
             @test Pi*f̌ ≈ f̃
             @test f̌ == f̄
-            Pi = plan_ichebyshevtransform!(f̌; kind=2)
+            @test_throws ArgumentError Pi * T[1,2]
+            Pi = @inferred(plan_ichebyshevtransform!(f̌, Val(2)))
             @test Pi*f̌ ≈ f̃
             @test f̌ ≈ f̃
+            @test_throws ArgumentError Pi * T[1,2]
 
-            @test chebyshevtransform(T[1]; kind=2) == T[1]
-            @test ichebyshevtransform(T[1]; kind=2) == T[1]
-            @test chebyshevtransform(T[]; kind=2) == T[]
-            @test ichebyshevtransform(T[]; kind=2) == T[]
+            @test_throws ArgumentError chebyshevtransform(T[1], Val(2))
+            @test_throws ArgumentError ichebyshevtransform(T[1], Val(2))
+            @test_throws ArgumentError chebyshevtransform(T[], Val(2))
+            @test_throws ArgumentError ichebyshevtransform(T[], Val(2))
         end
     end
 
     @testset "Chebyshev first kind points <-> second kind coefficients" begin
         for T in (Float32, Float64, ComplexF32, ComplexF64)
             n = 20
-            p_1 = chebyshevpoints(T, n, kind=1)
+            p_1 = chebyshevpoints(T, n)
             f = exp.(p_1)
-            f̌ = chebyshevutransform(f; kind=1)
+            f̌ = @inferred(chebyshevutransform(f))
 
             f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-1]' * f̌
             @test f̃(0.1) ≈ exp(T(0.1))
-            @test ichebyshevutransform(f̌; kind=1) ≈ exp.(p_1)
+            @test ichebyshevutransform(f̌) ≈ exp.(p_1)
 
             f̃ = copy(f)
             f̄ = copy(f̌)
-            P = plan_chebyshevutransform(f; kind=1)
+            P = @inferred(plan_chebyshevutransform(f))
             @test P*f == f̌
             @test f == f̃
-            P = plan_chebyshevutransform!(f; kind=1)
+            @test_throws ArgumentError P * T[1,2]
+            P = @inferred(plan_chebyshevutransform!(f))
             @test P*f == f̌
             @test f == f̌
-            Pi = plan_ichebyshevutransform(f̌; kind=1)
+            @test_throws ArgumentError P * T[1,2]
+            Pi = @inferred(plan_ichebyshevutransform(f̌))
             @test Pi*f̌ ≈ f̃
             @test f̌ == f̄
-            Pi = plan_ichebyshevutransform!(f̌; kind=1)
+            @test_throws ArgumentError Pi * T[1,2]
+            Pi = @inferred(plan_ichebyshevutransform!(f̌))
             @test Pi*f̌ ≈ f̃
             @test f̌ ≈ f̃
+            @test_throws ArgumentError Pi * T[1,2]
 
-            @test chebyshevutransform(T[1]; kind=1) == T[1]
-            @test ichebyshevutransform(T[1]; kind=1) == T[1]
-            @test chebyshevutransform(T[]; kind=1) == T[]
-            @test ichebyshevutransform(T[]; kind=1) == T[]
+            @test chebyshevutransform(T[1]) == T[1]
+            @test ichebyshevutransform(T[1]) == T[1]
+            @test chebyshevutransform(T[]) == T[]
+            @test ichebyshevutransform(T[]) == T[]
         end
     end
 
     @testset "Chebyshev second kind points <-> second kind coefficients" begin
         for T in (Float32, Float64, ComplexF32, ComplexF64)
             n = 20
-            p_2 = chebyshevpoints(T, n, kind=2)[2:end-1]
+            p_2 = chebyshevpoints(T, n, Val(2))[2:end-1]
             f = exp.(p_2)
-            f̌ = chebyshevutransform(f; kind=2)
+            f̌ = @inferred(chebyshevutransform(f, Val(2)))
 
             f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-3]' * f̌
             @test f̃(0.1) ≈ exp(T(0.1))
-            @test ichebyshevutransform(f̌; kind=2) ≈ exp.(p_2)
+            @test @inferred(ichebyshevutransform(f̌, Val(2))) ≈ exp.(p_2)
 
             f̃ = copy(f)
             f̄ = copy(f̌)
-            P = plan_chebyshevutransform(f; kind=2)
-            @test P*f == f̌
+            P = @inferred(plan_chebyshevutransform(f, Val(2)))
+            @test @inferred(P*f) == f̌
             @test f == f̃
-            P = plan_chebyshevutransform!(f; kind=2)
-            @test P*f == f̌
+            @test_throws ArgumentError P * T[1,2]
+            P = @inferred(plan_chebyshevutransform!(f, Val(2)))
+            @test @inferred(P*f) == f̌
             @test f == f̌
-            Pi = plan_ichebyshevutransform(f̌; kind=2)
-            @test Pi*f̌ ≈ f̃
+            @test_throws ArgumentError P * T[1,2]
+            Pi = @inferred(plan_ichebyshevutransform(f̌, Val(2)))
+            @test @inferred(Pi*f̌) ≈ f̃
             @test f̌ == f̄
-            Pi = plan_ichebyshevutransform!(f̌; kind=2)
-            @test Pi*f̌ ≈ f̃
+            @test_throws ArgumentError Pi * T[1,2]
+            Pi = @inferred(plan_ichebyshevutransform!(f̌, Val(2)))
+            @test @inferred(Pi*f̌) ≈ f̃
             @test f̌ ≈ f̃
+            @test_throws ArgumentError Pi * T[1,2]
 
-            @test chebyshevutransform(T[1]; kind=2) == T[1]
-            @test ichebyshevutransform(T[1]; kind=2) == T[1]
-            @test chebyshevutransform(T[]; kind=2) == T[]
-            @test ichebyshevutransform(T[]; kind=2) == T[]
+            @test_throws ArgumentError chebyshevutransform(T[1], Val(2))
+            @test_throws ArgumentError ichebyshevutransform(T[1], Val(2))
+            @test_throws ArgumentError chebyshevutransform(T[], Val(2))
+            @test_throws ArgumentError ichebyshevutransform(T[], Val(2))
         end
     end
+
+    @testset "matrix" begin
+        X = randn(1,1)
+        @test chebyshevtransform!(copy(X), Val(1)) == ichebyshevtransform!(copy(X), Val(1)) == X
+        @test_throws ArgumentError chebyshevtransform!(copy(X), Val(2))
+        @test_throws ArgumentError ichebyshevtransform!(copy(X), Val(2))
+
+        X = randn(10,11)
+        
+        # manual 2D Chebyshev
+        X̌ = copy(X)
+        for j in axes(X̌,2)
+            chebyshevtransform!(view(X̌,:,j))
+        end
+        for k in axes(X̌,1)
+            chebyshevtransform!(view(X̌,k,:))
+        end
+        @test chebyshevtransform!(copy(X), Val(1)) ≈ X̌
+        @test ichebyshevtransform!(copy(X̌), Val(1)) ≈ X
+
+        # manual 2D Chebyshev
+        X̌ = copy(X)
+        for j in axes(X̌,2)
+            chebyshevtransform!(view(X̌,:,j), Val(2))
+        end
+        for k in axes(X̌,1)
+            chebyshevtransform!(view(X̌,k,:), Val(2))
+        end
+        @test chebyshevtransform!(copy(X), Val(2)) ≈ X̌
+        @test ichebyshevtransform!(copy(X̌), Val(2)) ≈ X
+    end
+
+    @testset "Integer" begin
+        @test chebyshevtransform([1,2,3]) == chebyshevtransform([1.,2,3])
+        @test chebyshevtransform([1,2,3], Val(2)) == chebyshevtransform([1.,2,3], Val(2))
+        @test ichebyshevtransform([1,2,3]) == ichebyshevtransform([1.,2,3])
+        @test ichebyshevtransform([1,2,3], Val(2)) == ichebyshevtransform([1.,2,3], Val(2))
+        
+        @test chebyshevutransform([1,2,3]) == chebyshevutransform([1.,2,3])
+        @test chebyshevutransform([1,2,3], Val(2)) == chebyshevutransform([1.,2,3], Val(2))
+        @test ichebyshevutransform([1,2,3]) == ichebyshevutransform([1.,2,3])
+        @test ichebyshevutransform([1,2,3], Val(2)) == ichebyshevutransform([1.,2,3], Val(2))
+    end
 end
diff --git a/test/quadraturetests.jl b/test/quadraturetests.jl
index c79b346f..7956d7f0 100644
--- a/test/quadraturetests.jl
+++ b/test/quadraturetests.jl
@@ -4,7 +4,7 @@ import FastTransforms: chebyshevmoments1, chebyshevmoments2,
                        chebyshevjacobimoments1, chebyshevjacobimoments2,
                        chebyshevlogmoments1, chebyshevlogmoments2
 
-@testset "Fejér and Clenshaw--Curtis quadrature" begin
+@testset "Fejér and Clenshaw–Curtis quadrature" begin
     N = 20
     f = x -> exp(x)
 

From 9d3012cc8fdc156fa374359ddf7d882e88bd66aa Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Thu, 20 Aug 2020 13:30:12 +0100
Subject: [PATCH 028/222] Update chebyshev example

---
 examples/chebyshev.jl | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/examples/chebyshev.jl b/examples/chebyshev.jl
index 313467c5..1e3c9d7b 100644
--- a/examples/chebyshev.jl
+++ b/examples/chebyshev.jl
@@ -7,46 +7,46 @@ using FastTransforms
 
 # first kind points -> first kind polynomials
 n = 20
-p_1 = chebyshevpoints(Float64, n; kind=1)
+p_1 = chebyshevpoints(Float64, n, Val(1))
 f = exp.(p_1)
-f̌ = chebyshevtransform(f; kind=1)
+f̌ = chebyshevtransform(f, Val(1))
 
 f̃ = x -> [cos(k*acos(x)) for k=0:n-1]' * f̌
 f̃(0.1) ≈ exp(0.1)
 
 # first kind polynomials -> first kind points
-ichebyshevtransform(f̌; kind=1) ≈ exp.(p_1)
+ichebyshevtransform(f̌, Val(1)) ≈ exp.(p_1)
 
 # second kind points -> first kind polynomials
-p_2 = chebyshevpoints(Float64, n; kind=2)
+p_2 = chebyshevpoints(Float64, n, Val(2))
 f = exp.(p_2)
-f̌ = chebyshevtransform(f; kind=2)
+f̌ = chebyshevtransform(f, Val(2))
 
 f̃ = x -> [cos(k*acos(x)) for k=0:n-1]' * f̌
 f̃(0.1) ≈ exp(0.1)
 
 # first kind polynomials -> second kind points
-ichebyshevtransform(f̌; kind=2) ≈ exp.(p_2)
+ichebyshevtransform(f̌, Val(2)) ≈ exp.(p_2)
 
 
 # first kind points -> second kind polynomials
 n = 20
-p_1 = chebyshevpoints(Float64, n; kind=1)
+p_1 = chebyshevpoints(Float64, n, Val(1))
 f = exp.(p_1)
-f̌ = chebyshevutransform(f; kind=1)
+f̌ = chebyshevutransform(f, Val(1))
 f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-1]' * f̌
 f̃(0.1) ≈ exp(0.1)
 
 # second kind polynomials -> first kind points
-ichebyshevutransform(f̌; kind=1) ≈ exp.(p_1)
+ichebyshevutransform(f̌, Val(1)) ≈ exp.(p_1)
 
 
 # second kind points -> second kind polynomials
-p_2 = chebyshevpoints(Float64, n; kind=2)[2:n-1]
+p_2 = chebyshevpoints(Float64, n, Val(2))[2:n-1]
 f = exp.(p_2)
-f̌ = chebyshevutransform(f; kind=2)
+f̌ = chebyshevutransform(f, Val(2))
 f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-3]' * f̌
 f̃(0.1) ≈ exp(0.1)
 
 # second kind polynomials -> second kind points
-ichebyshevutransform(f̌; kind=2) ≈ exp.(p_2)
+ichebyshevutransform(f̌, Val(2)) ≈ exp.(p_2)

From 3ba4ee3b717598a7ed89fbe675e010c01c415bb9 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Wed, 26 Aug 2020 20:09:08 +0100
Subject: [PATCH 029/222] BigFloat chebyshev transform (#118)

* BigFloat Cheb transform

* v0.10.1

* add tests, comment out broken 2nd kind transform code
---
 Project.toml              |  2 +-
 src/chebyshevtransform.jl | 86 ++++++++++++++++++++++++++++++++++++++-
 src/libfasttransforms.jl  | 12 +++---
 test/chebyshevtests.jl    |  9 ++++
 4 files changed, 100 insertions(+), 9 deletions(-)

diff --git a/Project.toml b/Project.toml
index e014857d..a643642f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.10.0"
+version = "0.10.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 904fe69f..e07ed280 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -75,7 +75,7 @@ end
 *(P::ChebyshevTransformPlan{T,k,false}, x::AbstractVector{T}) where {T,k} = 
     ChebyshevTransformPlan{T,k,true}(P)*copy(x)
 
-chebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where T<:fftwNumber =
+chebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where T =
     plan_chebyshevtransform!(x, kind)*x
 
 
@@ -167,7 +167,7 @@ end
 *(P::IChebyshevTransformPlan{T,k,false},x::AbstractVector{T}) where {T,k} = 
     IChebyshevTransformPlan{T,k,true}(P)*copy(x)
 
-ichebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where {T<:fftwNumber} =
+ichebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where T =
     plan_ichebyshevtransform!(x, kind)*x
 
 ichebyshevtransform(x, kind=Val(1)) = ichebyshevtransform!(copy(x), kind)
@@ -433,3 +433,85 @@ chebyshevpoints(n::Integer, kind=Val(1)) = chebyshevpoints(Float64, n, kind)
 #     x = P.plan*x
 #     rmul!(x,half(T))
 # end
+
+
+###
+# BigFloat
+# Use `Nothing` and fall back too FFT
+###
+
+plan_chebyshevtransform(x::AbstractVector{T}, ::Val{kind}) where {T,kind} =
+    ChebyshevTransformPlan{T,kind,false,Nothing}()
+plan_ichebyshevtransform(x::AbstractVector{T}, ::Val{kind}) where {T,kind} =
+    IChebyshevTransformPlan{T,kind,false,Nothing}()
+
+plan_chebyshevtransform!(x::AbstractVector{T}, ::Val{kind}) where {T,kind} =
+    ChebyshevTransformPlan{T,kind,true,Nothing}()
+plan_ichebyshevtransform!(x::AbstractVector{T}, ::Val{kind}) where {T,kind} =
+    IChebyshevTransformPlan{T,kind,true,Nothing}()
+
+#following Chebfun's @Chebtech1/vals2coeffs.m and @Chebtech2/vals2coeffs.m
+function *(P::ChebyshevTransformPlan{T,1,false,Nothing}, x::AbstractVector{T}) where T
+    n = length(x)
+    if n == 1
+        x
+    else
+        w = [2exp(im*convert(T,π)*k/2n) for k=0:n-1]
+        ret = w.*ifft([x;reverse(x)])[1:n]
+        ret = T<:Real ? real(ret) : ret
+        ret[1] /= 2
+        ret
+    end
+end
+
+
+# function *(P::ChebyshevTransformPlan{T,2,false,Nothing}, x::AbstractVector{T}) where T
+#     n = length(x)
+#     if n == 1
+#         x
+#     else
+#         ret = ifft([x;x[end:-1:2]])[1:n]
+#         ret = T<:Real ? real(ret) : ret
+#         ret[2:n-1] *= 2
+#         ret
+#     end
+# end
+
+
+*(P::ChebyshevTransformPlan{T,1,true,Nothing}, x::AbstractVector{T}) where T =
+    copyto!(x, ChebyshevTransformPlan{T,1,false,Nothing}() * x)
+# *(P::ChebyshevTransformPlan{T,2,true,Nothing}, x::AbstractVector{T}) where T =
+#     copyto!(x, ChebyshevTransformPlan{T,2,false,Nothing}() * x)
+
+
+#following Chebfun's @Chebtech1/vals2coeffs.m and @Chebtech2/vals2coeffs.m
+function *(P::IChebyshevTransformPlan{T,1,false,Nothing}, x::AbstractVector{T}) where T
+    n = length(x)
+    if n == 1
+        x
+    else
+        w = [exp(-im*convert(T,π)*k/2n)/2 for k=0:2n-1]
+        w[1] *= 2;w[n+1] *= 0;w[n+2:end] *= -1
+        ret = fft(w.*[x;one(T);x[end:-1:2]])
+        ret = T<:Real ? real(ret) : ret
+        ret[1:n]
+    end
+end
+
+# function *(P::IChebyshevTransformPlan{T,2,true,Nothing}, x::AbstractVector{T}) where T
+#     n = length(x)
+#     if n == 1
+#         x
+#     else
+#         x[1] *= 2; x[end] *= 2
+#         chebyshevtransform!(x, Val(2))
+#         x[1] *= 2; x[end] *= 2
+#         lmul!(convert(T,n-1)/2, x)
+#         x
+#     end
+# end
+
+*(P::IChebyshevTransformPlan{T,1,true,Nothing}, x::AbstractVector{T}) where T =
+    copyto!(x, IChebyshevTransformPlan{T,1,false,Nothing}() * x)
+# *(P::IChebyshevTransformPlan{T,2,false,Nothing}, x::AbstractVector{T}) where T =
+#     IChebyshevTransformPlan{T,2,true,Nothing}() * copy(x)
\ No newline at end of file
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 32c9a959..23c36fef 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -595,12 +595,12 @@ function lmul!(p::FTPlan{Complex{Float64}, 2, SPINSPHEREANALYSIS}, x::Matrix{Com
     return x
 end
 
-*(p::FTPlan{T}, x::Array{T}) where T = lmul!(p, deepcopy(x))
-*(p::AdjointFTPlan{T}, x::Array{T}) where T = lmul!(p, deepcopy(x))
-*(p::TransposeFTPlan{T}, x::Array{T}) where T = lmul!(p, deepcopy(x))
-\(p::FTPlan{T}, x::Array{T}) where T = ldiv!(p, deepcopy(x))
-\(p::AdjointFTPlan{T}, x::Array{T}) where T = ldiv!(p, deepcopy(x))
-\(p::TransposeFTPlan{T}, x::Array{T}) where T = ldiv!(p, deepcopy(x))
+*(p::FTPlan{T}, x::AbstractArray{T}) where T = lmul!(p, Array(x))
+*(p::AdjointFTPlan{T}, x::AbstractArray{T}) where T = lmul!(p, Array(x))
+*(p::TransposeFTPlan{T}, x::AbstractArray{T}) where T = lmul!(p, Array(x))
+\(p::FTPlan{T}, x::AbstractArray{T}) where T = ldiv!(p, Array(x))
+\(p::AdjointFTPlan{T}, x::AbstractArray{T}) where T = ldiv!(p, Array(x))
+\(p::TransposeFTPlan{T}, x::AbstractArray{T}) where T = ldiv!(p, Array(x))
 
 *(p::FTPlan{T, 1}, x::UniformScaling{S}) where {T, S} = lmul!(p, Matrix{promote_type(T, S)}(x, p.n, p.n))
 *(p::AdjointFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = lmul!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n))
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 74ac1ada..87db2131 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -215,4 +215,13 @@ using FastTransforms, Test
         @test ichebyshevutransform([1,2,3]) == ichebyshevutransform([1.,2,3])
         @test ichebyshevutransform([1,2,3], Val(2)) == ichebyshevutransform([1.,2,3], Val(2))
     end
+
+    @testset "BigFloat" begin
+        x = BigFloat[1,2,3]
+        @test ichebyshevtransform(chebyshevtransform(x)) ≈ x
+        @test plan_chebyshevtransform(x)x ≈ chebyshevtransform(x)
+        @test plan_ichebyshevtransform(x)x ≈ ichebyshevtransform(x)
+        @test plan_chebyshevtransform!(x)copy(x) ≈ chebyshevtransform(x)
+        @test plan_ichebyshevtransform!(x)copy(x) ≈ ichebyshevtransform(x)
+    end
 end

From 96d1a2320d877c8e2ebb6ad1d8c2f69592deed68 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Sep 2020 11:11:05 -0500
Subject: [PATCH 030/222] try generating example docs using Literate.jl

---
 docs/Project.toml  |  2 ++
 docs/make.jl       | 19 +++++++++++--
 examples/sphere.jl | 68 ++++++++++++++++++++++++----------------------
 3 files changed, 55 insertions(+), 34 deletions(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index f2a273e5..110e6bb0 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,5 +1,7 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+FastTransforms = "057dd010-8810-581a-b7be-e3fc3b93f78c"
+Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 
 [compat]
 Documenter = "~0.24"
diff --git a/docs/make.jl b/docs/make.jl
index 31eebc8d..7e2bf482 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,4 +1,16 @@
-using Documenter, FastTransforms
+using Documenter, FastTransforms, Literate
+
+const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
+const OUTPUT_DIR   = joinpath(@__DIR__, "src/generated")
+
+examples = [
+	"sphere.jl",
+]
+
+for example in examples
+	example_filepath = joinpath(EXAMPLES_DIR, example)
+	Literate.markdown(example_filepath, OUTPUT_DIR; execute=true)
+end
 
 makedocs(
 			doctest = false,
@@ -6,7 +18,10 @@ makedocs(
 			sitename = "FastTransforms.jl",
 			authors = "Richard Mikael Slevinsky",
 			pages = Any[
-					"Home" => "index.md"
+					"Home" => "index.md",
+					"Examples" => [
+        				"generated/sphere.md",
+        				],
 					]
 			)
 
diff --git a/examples/sphere.jl b/examples/sphere.jl
index 3a1a0953..594f303d 100644
--- a/examples/sphere.jl
+++ b/examples/sphere.jl
@@ -1,28 +1,30 @@
-#############
+# ## Spherical harmonic addition theorem
 # This example confirms numerically that
+# ```math
+# \frac{P_4(z\cdot y) - P_4(x\cdot y)}{z\cdot y - x\cdot y},
+# ```
 #
-#   [P₄(z⋅y) - P₄(x⋅y)]/(z⋅y - x⋅y),
-#
-# is actually a degree-3 polynomial on 𝕊², where P₄ is the degree-4
-# Legendre polynomial, and x,y,z ∈ 𝕊².
-# To verify, we sample the function on a 5×9 equiangular grid
+# is actually a degree-$3$ polynomial on $\mathbb{S}^2$, where $P_4$ is the degree-$4$
+# Legendre polynomial, and $x,y,z \in \mathbb{S}^2$.
+# To verify, we sample the function on a $5\times9$ equiangular grid
 # defined by:
-#
-#   θₙ = (n+1/2)π/N, for 0 ≤ n < N, and
-#
-#   φₘ = 2π m/M, for 0 ≤ m < M;
-#
+# ```math
+# \theta_n = (n+\frac{1}{2})\pi/N,\quad{\rm for}\quad 0\le n < N,\quad{\rm and}
+# ```
+# ```math
+# \varphi_m = 2\pi m/M,\quad{\rm for}\quad 0\le m < M;
+# ```
 # we convert the function samples to Fourier coefficients using
 # `plan_sph_analysis`; and finally, we transform
 # the Fourier coefficients to spherical harmonic coefficients using
 # `plan_sph2fourier`.
 #
 # In the basis of spherical harmonics, it is plain to see the
-# addition theorem in action, since P₄(x⋅y) should only consist of
-# exact-degree-4 harmonics.
+# addition theorem in action, since $P_4(x\cdot y)$ should only consist of
+# exact-degree-$4$ harmonics.
 #
-# For the storage pattern of the arrays, please consult the documentation.
-#############
+# For the storage pattern of the arrays, please consult the
+# [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
 
 function threshold!(A::AbstractArray, ϵ)
     for i in eachindex(A)
@@ -33,46 +35,50 @@ end
 
 using FastTransforms, LinearAlgebra
 
-# The colatitudinal grid (mod π):
+# The colatitudinal grid (mod $\pi$):
 N = 5
 θ = (0.5:N-0.5)/N
 
-# The longitudinal grid (mod π):
+# The longitudinal grid (mod $\pi$):
 M = 2*N-1
 φ = (0:M-1)*2/M
 
-# Arbitrarily, we place x at the North pole:
+# Arbitrarily, we place $x$ at the North pole:
 x = [0,0,1]
 
 # Another vector is completely free:
 y = normalize([.123,.456,.789])
 
-# Thus z ∈ 𝕊² is our variable vector, parameterized in spherical coordinates:
+# Thus $z \in \mathbb{S}^2$ is our variable vector, parameterized in spherical coordinates:
 z = (θ,φ) -> [sinpi(θ)*cospi(φ), sinpi(θ)*sinpi(φ), cospi(θ)]
 
-# The degree-4 Legendre polynomial is:
+# The degree-$4$ Legendre polynomial is:
 P4 = x -> (35*x^4-30*x^2+3)/8
 
 # On the tensor product grid, our function samples are:
 F = [(P4(z(θ,φ)⋅y) - P4(x⋅y))/(z(θ,φ)⋅y - x⋅y) for θ in θ, φ in φ]
 
+# We precompute a spherical harmonic--Fourier plan:
 P = plan_sph2fourier(F)
+
+# And an FFTW Fourier analysis plan on $\mathbb{S}^2$:
 PA = plan_sph_analysis(F)
 
-# Its spherical harmonic coefficients demonstrate that it is degree-3:
+# Its spherical harmonic coefficients demonstrate that it is degree-$3$:
 V = PA*F
 U3 = threshold!(P\V, 400*eps())
 
-# Similarly, on the tensor product grid, the Legendre polynomial P₄(z⋅y) is:
+# Similarly, on the tensor product grid, the Legendre polynomial $P_4(z\cdot y)$ is:
 F = [P4(z(θ,φ)⋅y) for θ in θ, φ in φ]
 
-# Its spherical harmonic coefficients demonstrate that it is exact-degree-4:
+# Its spherical harmonic coefficients demonstrate that it is exact-degree-$4$:
 V = PA*F
 U4 = threshold!(P\V, 3*eps())
 
-nrm1 = norm(U4);
+# The $L^2(\mathbb{S}^2)$ norm of the function is:
+nrm1 = norm(U4)
 
-# Finally, the Legendre polynomial P₄(z⋅x) is aligned with the grid:
+# Finally, the Legendre polynomial $P_4(z\cdot x)$ is aligned with the grid:
 F = [P4(z(θ,φ)⋅x) for θ in θ, φ in φ]
 
 # It only has one nonnegligible spherical harmonic coefficient.
@@ -80,16 +86,14 @@ F = [P4(z(θ,φ)⋅x) for θ in θ, φ in φ]
 V = PA*F
 U4 = threshold!(P\V, 3*eps())
 
-# That nonnegligible coefficient should be approximately √(2π/(4+1/2)),
+# That nonnegligible coefficient should be approximately `√(2π/(4+1/2))`,
 # since the convention in this library is to orthonormalize.
 
-nrm2 = norm(U4);
+nrm2 = norm(U4)
 
-# Note that the integrals of both functions P₄(z⋅y) and P₄(z⋅x) and their
-# L²(𝕊²) norms are the same because of rotational invariance. The integral of
+# Note that the integrals of both functions $P_4(z\cdot y)$ and $P_4(z\cdot x)$ and their
+# $L^2(\mathbb{S}^2)$ norms are the same because of rotational invariance. The integral of
 # either is perhaps not interesting as it is mathematically zero, but the norms
 # of either should be approximately the same.
 
-@show nrm1
-@show nrm2
-@show nrm1 ≈ nrm2
+nrm1 ≈ nrm2

From 783b6dc5df153531b033cd92b426a4b9faa265ff Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Sep 2020 11:18:01 -0500
Subject: [PATCH 031/222] fix link

---
 examples/sphere.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/sphere.jl b/examples/sphere.jl
index 594f303d..02b8d4f8 100644
--- a/examples/sphere.jl
+++ b/examples/sphere.jl
@@ -1,4 +1,4 @@
-# ## Spherical harmonic addition theorem
+# # Spherical harmonic addition theorem
 # This example confirms numerically that
 # ```math
 # \frac{P_4(z\cdot y) - P_4(x\cdot y)}{z\cdot y - x\cdot y},

From e885626e7b5e303877a9da9f8757a2229ff40151 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Sep 2020 14:24:33 -0500
Subject: [PATCH 032/222] convert remaining examples

remove stale references
add generated folder to gitignore
---
 .gitignore                    |  2 +-
 README.md                     | 16 ++-------
 docs/make.jl                  | 12 +++++++
 examples/chebyshev.jl         | 26 ++++++--------
 examples/disk.jl              | 47 +++++++++++++++----------
 examples/nonlocaldiffusion.jl | 57 ++++++++++++++++++++++--------
 examples/padua.jl             | 14 +++++---
 examples/sphere.jl            | 13 +++----
 examples/spinweighted.jl      | 40 +++++++++++----------
 examples/triangle.jl          | 66 +++++++++++++++++++++--------------
 10 files changed, 172 insertions(+), 121 deletions(-)

diff --git a/.gitignore b/.gitignore
index 77b0d9d8..0a0d3048 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 docs/build/
-docs/site/
+docs/src/generated
 deps/build.log
 deps/libfasttransforms.*
 .DS_Store
diff --git a/README.md b/README.md
index 797a7abb..aa3f95c8 100644
--- a/README.md
+++ b/README.md
@@ -157,18 +157,8 @@ julia> @time norm(ipaduatransform(paduatransform(v)) - v)/norm(v)
 
 # References:
 
-   [1]  B. Alpert and V. Rokhlin. <a href="http://dx.doi.org/10.1137/0912009">A fast algorithm for the evaluation of Legendre expansions</a>, *SIAM J. Sci. Stat. Comput.*, **12**:158—179, 1991.
+   [1]  D. Ruiz—Antolín and A. Townsend. <a href="https://doi.org/10.1137/17M1134822">A nonuniform fast Fourier transform based on low rank approximation</a>, *SIAM J. Sci. Comput.*, **40**:A529–A547, 2018.
 
-   [2]  N. Hale and A. Townsend. <a href="http://dx.doi.org/10.1137/130932223">A fast, simple, and stable Chebyshev—Legendre transform using an asymptotic formula</a>, *SIAM J. Sci. Comput.*, **36**:A148—A167, 2014.
+   [2]  R. M. Slevinsky. <a href="https://doi.org/10.1016/j.acha.2017.11.001">Fast and backward stable transforms between spherical harmonic expansions and bivariate Fourier series</a>, *Appl. Comput. Harmon. Anal.*, **47**:585—606, 2019.
 
-   [3]  J. Keiner. <a href="http://dx.doi.org/10.1137/070703065">Computing with expansions in Gegenbauer polynomials</a>, *SIAM J. Sci. Comput.*, **31**:2151—2171, 2009.
-
-   [4]  D. Ruiz—Antolín and A. Townsend. <a href="https://arxiv.org/abs/1701.04492">A nonuniform fast Fourier transform based on low rank approximation</a>, arXiv:1701.04492, 2017.
-
-   [5]  R. M. Slevinsky. <a href="https://doi.org/10.1093/imanum/drw070">On the use of Hahn's asymptotic formula and stabilized recurrence for a fast, simple, and stable Chebyshev—Jacobi transform</a>, *IMA J. Numer. Anal.*, **38**:102—124, 2018.
-
-   [6]  R. M. Slevinsky. <a href="https://doi.org/10.1016/j.acha.2017.11.001">Fast and backward stable transforms between spherical harmonic expansions and bivariate Fourier series</a>, *Appl. Comput. Harmon. Anal.*, **47**:585—606, 2019.
-
-   [7]  R. M. Slevinsky, <a href="https://arxiv.org/abs/1711.07866">Conquering the pre-computation in two-dimensional harmonic polynomial transforms</a>, arXiv:1711.07866, 2017.
-
-   [8]  A. Townsend, M. Webb, and S. Olver. <a href="https://doi.org/10.1090/mcom/3277">Fast polynomial transforms based on Toeplitz and Hankel matrices</a>, in press at *Math. Comp.*, 2017.
+   [3]  R. M. Slevinsky, <a href="https://arxiv.org/abs/1711.07866">Conquering the pre-computation in two-dimensional harmonic polynomial transforms</a>, arXiv:1711.07866, 2017.
diff --git a/docs/make.jl b/docs/make.jl
index 7e2bf482..41486745 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -4,7 +4,13 @@ const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
 const OUTPUT_DIR   = joinpath(@__DIR__, "src/generated")
 
 examples = [
+	"chebyshev.jl",
+	"disk.jl",
+	"nonlocaldiffusion.jl",
+	"padua.jl",
 	"sphere.jl",
+	"spinweighted.jl",
+	"triangle.jl",
 ]
 
 for example in examples
@@ -20,7 +26,13 @@ makedocs(
 			pages = Any[
 					"Home" => "index.md",
 					"Examples" => [
+						"generated/chebyshev.md",
+						"generated/disk.md",
+						"generated/nonlocaldiffusion.md",
+						"generated/padua.md",
         				"generated/sphere.md",
+						"generated/spinweighted.md",
+						"generated/triangle.md",
         				],
 					]
 			)
diff --git a/examples/chebyshev.jl b/examples/chebyshev.jl
index 1e3c9d7b..5f553b09 100644
--- a/examples/chebyshev.jl
+++ b/examples/chebyshev.jl
@@ -1,52 +1,46 @@
-#############
+# # Chebyshev transform
 # This demonstrates the Chebyshev transform and inverse transform,
 # explaining precisely the normalization and points
-#############
 
 using FastTransforms
-
-# first kind points -> first kind polynomials
 n = 20
+
+# First kind points $\to$ first kind polynomials
 p_1 = chebyshevpoints(Float64, n, Val(1))
 f = exp.(p_1)
 f̌ = chebyshevtransform(f, Val(1))
-
 f̃ = x -> [cos(k*acos(x)) for k=0:n-1]' * f̌
 f̃(0.1) ≈ exp(0.1)
 
-# first kind polynomials -> first kind points
+# First kind polynomials $\to$ first kind points
 ichebyshevtransform(f̌, Val(1)) ≈ exp.(p_1)
 
-# second kind points -> first kind polynomials
+# Second kind points $\to$ first kind polynomials
 p_2 = chebyshevpoints(Float64, n, Val(2))
 f = exp.(p_2)
 f̌ = chebyshevtransform(f, Val(2))
-
 f̃ = x -> [cos(k*acos(x)) for k=0:n-1]' * f̌
 f̃(0.1) ≈ exp(0.1)
 
-# first kind polynomials -> second kind points
+# First kind polynomials $\to$ second kind points
 ichebyshevtransform(f̌, Val(2)) ≈ exp.(p_2)
 
-
-# first kind points -> second kind polynomials
-n = 20
+# First kind points $\to$ second kind polynomials
 p_1 = chebyshevpoints(Float64, n, Val(1))
 f = exp.(p_1)
 f̌ = chebyshevutransform(f, Val(1))
 f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-1]' * f̌
 f̃(0.1) ≈ exp(0.1)
 
-# second kind polynomials -> first kind points
+# Second kind polynomials $\to$ first kind points
 ichebyshevutransform(f̌, Val(1)) ≈ exp.(p_1)
 
-
-# second kind points -> second kind polynomials
+# Second kind points $\to$ second kind polynomials
 p_2 = chebyshevpoints(Float64, n, Val(2))[2:n-1]
 f = exp.(p_2)
 f̌ = chebyshevutransform(f, Val(2))
 f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-3]' * f̌
 f̃(0.1) ≈ exp(0.1)
 
-# second kind polynomials -> second kind points
+# Second kind polynomials $\to$ second kind points
 ichebyshevutransform(f̌, Val(2)) ≈ exp.(p_2)
diff --git a/examples/disk.jl b/examples/disk.jl
index 4a4b6c4d..3b91e6eb 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -1,47 +1,56 @@
-#############
+# Holomorphic integration
 # In this example, we explore integration of a harmonic function:
-#
-#   f(x,y) = (x^2-y^2+1)/[(x^2-y^2+1)^2+(2xy+1)^2],
-#
+# ```math
+#   f(x,y) = \frac{x^2-y^2+1}{(x^2-y^2+1)^2+(2xy+1)^2},
+# ```
 # over the unit disk. In this case, we know from complex analysis that the
-# integral of a holomorphic function is equal to π × f(0,0).
-# We analyze the function on an N×M tensor product grid defined by:
-#
-#   rₙ = cos[(n+1/2)π/2N], for 0 ≤ n < N, and
-#
-#   θₘ = 2π m/M, for 0 ≤ m < M;
-#
+# integral of a holomorphic function is equal to $\pi \times f(0,0)$.
+# We analyze the function on an $N\times M$ tensor product grid defined by:
+# ```math
+# \begin{aligned}
+# r_n & = \cos\left[(n+\tfrac{1}{2})\pi/2N],\quad{\rm for} 0\le n < N,\quad{\rm and}\\
+# \theta_m & = 2\pi m/M,\quad{\rm for}\quad 0\le m < M;
+# \end{aligned}
+# ```
 # we convert the function samples to Chebyshev×Fourier coefficients using
 # `plan_disk_analysis`; and finally, we transform the Chebyshev×Fourier
-# coefficients to disk harmonic coefficients using `plan_disk2cxf`.
+# coefficients to Zernike polynomial coefficients using `plan_disk2cxf`.
 #
-# For the storage pattern of the arrays, please consult the documentation.
-#############
+# For the storage pattern of the arrays, please consult the
+# [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
 
 using FastTransforms, LinearAlgebra
 
+# Our function $f$ on the disk:
 f = (x,y) -> (x^2-y^2+1)/((x^2-y^2+1)^2+(2x*y+1)^2)
 
+# The Zernike polynomial degree:
 N = 5
 M = 4N-3
 
+# The radial grid:
 r = [sinpi((N-n-0.5)/(2N)) for n in 0:N-1]
-θ = (0:M-1)*2/M # mod π.
+
+# The angular grid (mod $\pi$):
+θ = (0:M-1)*2/M
 
 # On the mapped tensor product grid, our function samples are:
 F = [f(r*cospi(θ), r*sinpi(θ)) for r in r, θ in θ]
 
+# We precompute a Zernike--Chebyshev×Fourier plan:
 P = plan_disk2cxf(F)
+
+# And an FFTW Chebyshev×Fourier analysis plan on the disk:
 PA = plan_disk_analysis(F)
 
 # Its Zernike coefficients are:
 U = P\(PA*F)
 
-# The Zernike coefficients are useful for integration. The integral of f(x,y)
-# over the disk should be π/2 by harmonicity. The coefficient of Z_0^0
-# multiplied by √π is:
+# The Zernike coefficients are useful for integration. The integral of $f(x,y)$
+# over the disk should be $\pi/2$ by harmonicity. The coefficient of $Z_0^0$
+# multiplied by `√π` is:
 U[1, 1]*sqrt(π)
 
-# Using an orthonormal basis, the integral of [f(x,y)]^2 over the disk is
+# Using an orthonormal basis, the integral of $[f(x,y)]^2$ over the disk is
 # approximately the square of the 2-norm of the coefficients:
 norm(U)^2
diff --git a/examples/nonlocaldiffusion.jl b/examples/nonlocaldiffusion.jl
index 102e97bf..9598e5c2 100644
--- a/examples/nonlocaldiffusion.jl
+++ b/examples/nonlocaldiffusion.jl
@@ -1,3 +1,39 @@
+# # Nonlocal diffusion on $\mathbb{S}^2$
+# This example calculates the spectrum of the nonlocal diffusion operator:
+# ```math
+# \mathcal{L}_\delta u = \int_{\mathbb{S}^2} \rho_\delta(|\mathbf{x}-\mathbf{y}|)\left[u(\mathbf{x}) - u(\mathbf{y})\right] \,\mathrm{d}\Omega(\mathbf{y}),
+# ```
+# defined in Eq. (2) of
+#
+# R. M. Slevinsky, H. Montanelli, and Q. Du, [A spectral method for nonlocal diffusion operators on the sphere](https://doi.org/10.1016/j.jcp.2018.06.024), *J. Comp. Phys.*, **372**:893--911, 2018.
+#
+# In the above, $0<\delta<2$, $-1<\alpha<1$, and the kernel:
+# ```math
+# \rho_\delta(|\mathbf{x}-\mathbf{y}|) = \frac{4(1+\alpha)}{\pi \delta^{2+2\alpha}} \frac{\chi_{[0,\delta]}(|\mathbf{x}-\mathbf{y}|)}{|\mathbf{x}-\mathbf{y}|^{2-2\alpha}},
+# ```
+# where $\chi_I(\cdot)$ is the indicator function on the set $I$.
+#
+# This nonlocal operator is diagonalized by spherical harmonics:
+# ```math
+# \mathcal{L}_\delta Y_\ell^m(\mathbf{x}) = \lambda_\ell(\alpha, \delta) Y_\ell^m(\mathbf{x}),
+# ```
+# and its eigenfunctions are given by the generalized Funk--Hecke formula:
+# ```math
+# \lambda_\ell(\alpha, \delta) = \frac{(1+\alpha) 2^{2+\alpha}}{\delta^{2+2\alpha}}\int_{1-\delta^2/2}^1 \left[P_\ell(t)-1\right] (1-t)^{\alpha-1} \,\mathrm{d} t.
+# ```
+# In the paper, the authors use Clenshaw--Curtis quadrature and asymptotic evaluation of Legendre polynomials to achieve $\mathcal{O}(n^2\log n)$ complexity for the evaluation of the first $n$ eigenvalues. With a change of basis, this complexity can be reduced to $\mathcal{O}(n\log n)$.
+#
+# First, we represent:
+# ```math
+# P_n(t) - 1 = \sum_{j=0}^{n-1} \left[P_{j+1}(t) - P_j(t)\right] = -\sum_{j=0}^{n-1} (1-t) P_j^{(1,0)}(t).
+# ```
+# Then, we represent $P_j^{(1,0)}(t)$ with Jacobi polynomials $P_i^{(\alpha,0)}(t)$ and we integrate using [DLMF 18.9.16](https://dlmf.nist.gov/18.9.16):
+# ```math
+# \int_x^1 P_i^{(\alpha,0)}(t)(1-t)^\alpha\,\mathrm{d}t = \left\{ \begin{array}{cc} \frac{(1-x)^{\alpha+1}}{\alpha+1} & \mathrm{for~}i=0,\\ \frac{1}{2i}(1-x)^{\alpha+1}(1+x)P_{i-1}^{(\alpha+1,1)}(x), & \mathrm{for~}i>0.\end{array}\right.
+# ```
+# The code below implements this algorithm, making use of the Jacobi--Jacobi transform `plan_jac2jac`.
+# For numerical stability, the conversion from Jacobi polynomials $P_j^{(1,0)}(t)$ to $P_i^{(\alpha,0)}(t)$ is divided into conversion from $P_j^{(1,0)}(t)$ to $P_k^{(0,0)}(t)$, before conversion from $P_k^{(0,0)}(t)$ to $P_i^{(\alpha,0)}(t)$.
+
 using FastTransforms, LinearAlgebra
 
 function oprec!(n::Integer, v::AbstractVector, alpha::Real, delta2::Real)
@@ -13,19 +49,6 @@ function oprec!(n::Integer, v::AbstractVector, alpha::Real, delta2::Real)
     return v
 end
 
-"""
-This example calculates the spectrum of the nonlocal diffusion operator:
-
-```math
-ℒ_δ u = ∫_𝕊² ρ_δ(|𝐱-𝐲|)[u(𝐱) - u(𝐲)] dΩ(𝐲),
-```
-
-defined in Eq. (2) of
-
-    R. M. Slevinsky, H. Montanelli, and Q. Du, A spectral method for nonlocal diffusion operators on the sphere, J. Comp. Phys., 372:893--911, 2018.
-
-available at https://doi.org/10.1016/j.jcp.2018.06.024
-"""
 function evaluate_lambda(n::Integer, alpha::T, delta::T) where T
     delta2 = delta*delta
     scl = (1+alpha)*(2-delta2/2)
@@ -60,7 +83,11 @@ function evaluate_lambda(n::Integer, alpha::T, delta::T) where T
     return lambda
 end
 
-lambda = evaluate_lambda(1024, -0.5, 1.0)
-lambdabf = evaluate_lambda(1024, parse(BigFloat, "-0.5"), parse(BigFloat, "1.0"))
+# The spectrum in `Float64`:
+lambda = evaluate_lambda(10, -0.5, 1.0)
+
+# The spectrum in `BigFloat`:
+lambdabf = evaluate_lambda(10, parse(BigFloat, "-0.5"), parse(BigFloat, "1.0"))
 
+# The $\infty$-norm relative error:
 norm(lambda-lambdabf, Inf)/norm(lambda, Inf)
diff --git a/examples/padua.jl b/examples/padua.jl
index bdb9fdab..2945f273 100644
--- a/examples/padua.jl
+++ b/examples/padua.jl
@@ -1,16 +1,20 @@
-#############
+# # Padua transform
 # This demonstrates the Padua transform and inverse transform,
 # explaining precisely the normalization and points
-#############
 
 using FastTransforms
 
+# We define the Padua points and extract Cartesian components:
 N = 15
 pts = paduapoints(N)
-x = pts[:,1]; y = pts[:,2]
+x = pts[:,1];
+y = pts[:,2];
 
+# We take the Padua transform of the function:
 f = (x,y) -> exp(x + cos(y))
-f̌ = paduatransform(f.(x , y))
+f̌ = paduatransform(f.(x , y));
+
+# and use the coefficients to create an approximation to the function $f$:
 f̃ = (x,y) -> begin
     j = 1
     ret = 0.0
@@ -21,6 +25,8 @@ f̃ = (x,y) -> begin
     ret
 end
 
+# At a particular point, is the function well-approximated?
 f̃(0.1,0.2) ≈ f(0.1,0.2)
 
+# Does the inverse transform bring us back to the grid?
 ipaduatransform(f̌) ≈ f̃.(x,y)
diff --git a/examples/sphere.jl b/examples/sphere.jl
index 02b8d4f8..1044e070 100644
--- a/examples/sphere.jl
+++ b/examples/sphere.jl
@@ -1,18 +1,17 @@
 # # Spherical harmonic addition theorem
 # This example confirms numerically that
 # ```math
-# \frac{P_4(z\cdot y) - P_4(x\cdot y)}{z\cdot y - x\cdot y},
+# f(z) = \frac{P_4(z\cdot y) - P_4(x\cdot y)}{z\cdot y - x\cdot y},
 # ```
-#
 # is actually a degree-$3$ polynomial on $\mathbb{S}^2$, where $P_4$ is the degree-$4$
 # Legendre polynomial, and $x,y,z \in \mathbb{S}^2$.
 # To verify, we sample the function on a $5\times9$ equiangular grid
 # defined by:
 # ```math
-# \theta_n = (n+\frac{1}{2})\pi/N,\quad{\rm for}\quad 0\le n < N,\quad{\rm and}
-# ```
-# ```math
-# \varphi_m = 2\pi m/M,\quad{\rm for}\quad 0\le m < M;
+# \begin{aligned}
+# \theta_n & = (n+\tfrac{1}{2})\pi/N,\quad{\rm for}\quad 0\le n < N,\quad{\rm and}\\
+# \varphi_m & = 2\pi m/M,\quad{\rm for}\quad 0\le m < M;
+# \end{aligned}
 # ```
 # we convert the function samples to Fourier coefficients using
 # `plan_sph_analysis`; and finally, we transform
@@ -88,12 +87,10 @@ U4 = threshold!(P\V, 3*eps())
 
 # That nonnegligible coefficient should be approximately `√(2π/(4+1/2))`,
 # since the convention in this library is to orthonormalize.
-
 nrm2 = norm(U4)
 
 # Note that the integrals of both functions $P_4(z\cdot y)$ and $P_4(z\cdot x)$ and their
 # $L^2(\mathbb{S}^2)$ norms are the same because of rotational invariance. The integral of
 # either is perhaps not interesting as it is mathematically zero, but the norms
 # of either should be approximately the same.
-
 nrm1 ≈ nrm2
diff --git a/examples/spinweighted.jl b/examples/spinweighted.jl
index 344a0785..7c07476c 100644
--- a/examples/spinweighted.jl
+++ b/examples/spinweighted.jl
@@ -1,28 +1,28 @@
-#############
+# # Spin-weighted spherical harmonics
 # This example plays with analysis of:
+# ```math
+# f(r) = e^{{\rm i} k\cdot r},
+# ```
+# for some $k\in\mathbb{R}^3$ and where $r\in\mathbb{S}^2$, using spin-$0$ spherical harmonics.
 #
-#   f(r) = e^{i k⋅r},
+# It applies $\dh$, the spin-raising operator,
+# both on the spin-$0$ coefficients as well as the original function,
+# followed by a spin-$1$ analysis to compare coefficients.
 #
-# for some k ∈ ℝ³ and where r ∈ 𝕊², using spin-0 spherical harmonics.
-#
-# It applies ð, the spin-raising operator,
-# both on the spin-0 coefficients as well as the original function,
-# followed by a spin-1 analysis to compare coefficients.
-#
-# See also sphere.jl
-# For the storage pattern of the arrays, please consult the documentation.
-#############
+# For the storage pattern of the arrays, please consult the
+# [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
 
 using FastTransforms, LinearAlgebra
 
-# The colatitudinal grid (mod π):
+# The colatitudinal grid (mod $\pi$):
 N = 10
 θ = (0.5:N-0.5)/N
 
-# The longitudinal grid (mod π):
+# The longitudinal grid (mod $\pi$):
 M = 2*N-1
 φ = (0:M-1)*2/M
 
+# Our choice of $k$ and angular parametrization of $r$:
 k = [2/7, 3/7, 6/7]
 r = (θ,φ) -> [sinpi(θ)*cospi(φ), sinpi(θ)*sinpi(φ), cospi(θ)]
 
@@ -30,17 +30,19 @@ r = (θ,φ) -> [sinpi(θ)*cospi(φ), sinpi(θ)*sinpi(φ), cospi(θ)]
 
 F = [exp(im*(k⋅r(θ,φ))) for θ in θ, φ in φ]
 
+# We precompute a spin-$0$ spherical harmonic--Fourier plan:
 P = plan_spinsph2fourier(F, 0)
+
+# And an FFTW Fourier analysis plan on $\mathbb{S}^2$:
 PA = plan_spinsph_analysis(F, 0)
 
 # Its spin-0 spherical harmonic coefficients are:
-
 U⁰ = P\(PA*F)
 
+# We can check its $L^2(\mathbb{S}^2)$ norm against an exact result:
 norm(U⁰) ≈ sqrt(4π)
 
-# Spin can be incremented by applying ð, either on the spin-0 coefficients:
-
+# Spin can be incremented by applying $\dh$, either on the spin-$0$ coefficients:
 U¹c = zero(U⁰)
 for n in 1:N-1
     U¹c[n, 1] = sqrt(n*(n+1))*U⁰[n+1, 1]
@@ -52,13 +54,13 @@ for m in 1:M÷2
     end
 end
 
-# or on the original function through analysis with spin-1 spherical harmonics:
-
+# or on the original function through analysis with spin-$1$ spherical harmonics:
 F = [-(k[1]*(im*cospi(θ)*cospi(φ) + sinpi(φ)) + k[2]*(im*cospi(θ)*sinpi(φ)-cospi(φ)) - im*k[3]*sinpi(θ))*exp(im*(k⋅r(θ,φ))) for θ in θ, φ in φ]
 
+# We change plans with spin-$1$ now and reanalyze:
 P = plan_spinsph2fourier(F, 1)
 PA = plan_spinsph_analysis(F, 1)
-
 U¹s = P\(PA*F)
 
+# Finally, we check $L^2(\mathbb{S}^2)$ norms against another exact result:
 norm(U¹c) ≈ norm(U¹s) ≈ sqrt(8π/3*(k⋅k))
diff --git a/examples/triangle.jl b/examples/triangle.jl
index e0879fc3..32971b14 100644
--- a/examples/triangle.jl
+++ b/examples/triangle.jl
@@ -1,67 +1,79 @@
-#############
+# # Calculus on the reference triangle
 # In this example, we sample a bivariate function:
-#
-#   f(x,y) = 1/(1+x^2+y^2),
-#
-# on the reference triangle with vertices (0,0), (0,1), and (1,0) and analyze it
+# ```math
+# f(x,y) = \frac{1}{1+x^2+y^2},
+# ```
+# on the reference triangle with vertices $(0,0)$, $(0,1)$, and $(1,0)$ and analyze it
 # in a Proriol series. Then, we find Proriol series for each component of its
 # gradient by term-by-term differentiation of our expansion, and we compare them
 # with the true Proriol series by sampling an exact expression for the gradient.
 #
-# We analyze f(x,y) on an N×M mapped tensor product grid defined by:
-#
-#   x = (1+u)/2, and y = (1-u)*(1+v)/4, where:
-#
-#   uₙ = cos[(n+1/2)π/N], for 0 ≤ n < N, and
-#
-#   vₘ = cos[(m+1/2)π/M], for 0 ≤ m < M;
-#
+# We analyze $f(x,y)$ on an $N\times M$ mapped tensor product grid defined by:
+# ```math
+# \begin{aligned}
+# x & = (1+u)/2,\quad{\rm and}\quad y = (1-u)*(1+v)/4,\quad {\rm where:}\\
+# u_n & = \cos\left[(n+\tfrac{1}{2})\pi/N\right],\quad{\rm for}\quad 0\le n < N,\quad{\rm and}\\
+# v_m & = \cos\left[(m+\tfrac{1}{2})\pi/M\right],\quad{\rm for}\quad 0\le m < M;
+# \end{aligned}
+# ```
 # we convert the function samples to mapped Chebyshev² coefficients using
 # `plan_tri_analysis`; and finally, we transform the mapped Chebyshev²
 # coefficients to Proriol coefficients using `plan_tri2cheb`.
 #
-# For the storage pattern of the arrays, please consult the documentation.
-#############
+# For the storage pattern of the arrays, please consult the
+# [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
 
 using FastTransforms, LinearAlgebra
 
+# Our function $f$ and the Cartesian components of its gradient:
 f = (x,y) -> 1/(1+x^2+y^2)
 fx = (x,y) -> -2x/(1+x^2+y^2)^2
 fy = (x,y) -> -2y/(1+x^2+y^2)^2
 
+# The polynomial degree:
 N = 10
 M = N
 
+# The parameters of the Proriol series:
 α, β, γ = 0, 0, 0
 
+# The $u$ grid:
 u = [sinpi((N-2n-1)/(2N)) for n in 0:N-1]
+
+# And the $v$ grid:
 v = [sinpi((M-2m-1)/(2M)) for m in 0:M-1]
 
-# Instead of using the u, v grid, we use one with more accuracy near the origin.
+# Instead of using the $u\times v$ grid, we use one with more accuracy near the origin.
+# Defining $x$ by:
 x = [sinpi((2N-2n-1)/(4N))^2 for n in 0:N-1]
+
+# And $w$ by:
 w = [sinpi((2M-2m-1)/(4M))^2 for m in 0:M-1]
 
-(1 .+ u)./2 ≈ x
-(1 .- u).*(1 .+ v')/4 ≈ reverse(x).*w'
+# We see how the two grids are related:
+((1 .+ u)./2 ≈ x) * ((1 .- u).*(1 .+ v')/4 ≈ reverse(x).*w')
 
 # On the mapped tensor product grid, our function samples are:
 F = [f(x[n+1], x[N-n]*w[m+1]) for n in 0:N-1, m in 0:M-1]
 
+# We precompute a Proriol--Chebyshev² plan:
 P = plan_tri2cheb(F, α, β, γ)
+
+# And an FFTW Chebyshev² plan on the triangle:
 PA = plan_tri_analysis(F)
 
-# Its Proriol-(α,β,γ) coefficients are:
+# Its Proriol-$(α,β,γ)$ coefficients are:
 U = P\(PA*F)
 
 # Similarly, our function's gradient samples are:
 Fx = [fx(x[n+1], x[N-n]*w[m+1]) for n in 0:N-1, m in 0:M-1]
+
+# and:
 Fy = [fy(x[n+1], x[N-n]*w[m+1]) for n in 0:N-1, m in 0:M-1]
 
-# For the partial derivative with respect to x, Olver et al.
+# For the partial derivative with respect to $x$, [Olver et al.](https://doi.org/10.1137/19M1245888)
 # derive simple expressions for the representation of this component
-# using a Proriol-(α+1,β,γ+1) series. For the partial derivative with respect
-# to y, the analogous formulae result in a Proriol-(α,β+1,γ+1) series.
-# These expressions are adapted from https://arxiv.org/abs/1902.04863.
+# using a Proriol-$(α+1,β,γ+1)$ series.
 Gx = zeros(Float64, N, M)
 for m = 0:M-2
     for n = 0:N-2
@@ -73,6 +85,8 @@ end
 Px = plan_tri2cheb(Fx, α+1, β, γ+1)
 Ux = Px\(PA*Fx)
 
+# For the partial derivative with respect to y, the analogous formulae result
+# in a Proriol-$(α,β+1,γ+1)$ series.
 Gy = zeros(Float64, N, M)
 for m = 0:M-2
     for n = 0:N-2
@@ -82,8 +96,8 @@ end
 Py = plan_tri2cheb(Fy, α, β+1, γ+1)
 Uy = Py\(PA*Fy)
 
-# The 2-norm relative error in differentiating the Proriol series
-# for f(x,y) term-by-term and its sampled gradient is:
+# The $2$-norm relative error in differentiating the Proriol series
+# for $f(x,y)$ term-by-term and its sampled gradient is:
 hypot(norm(Ux-Gx), norm(Uy-Gy))/hypot(norm(Ux), norm(Uy))
 
-# This error can be improved upon by increasing N and M.
+# This error can be improved upon by increasing $N$ and $M$.

From f492e4b4decd3ed6809f97f45366f5c9254526fa Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Sep 2020 14:35:50 -0500
Subject: [PATCH 033/222] fix disk header

---
 examples/disk.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/disk.jl b/examples/disk.jl
index 3b91e6eb..c5f1b808 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -1,4 +1,4 @@
-# Holomorphic integration
+# # Holomorphic integration on the unit disk
 # In this example, we explore integration of a harmonic function:
 # ```math
 #   f(x,y) = \frac{x^2-y^2+1}{(x^2-y^2+1)^2+(2xy+1)^2},

From ddad8ff9e4eda700063d8706acf1faadb34f43e7 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Sep 2020 15:43:52 -0500
Subject: [PATCH 034/222] cosmetic changes

---
 examples/disk.jl         | 2 +-
 examples/padua.jl        | 8 +++++---
 examples/spinweighted.jl | 6 +++---
 examples/triangle.jl     | 2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/examples/disk.jl b/examples/disk.jl
index c5f1b808..599feb98 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -8,7 +8,7 @@
 # We analyze the function on an $N\times M$ tensor product grid defined by:
 # ```math
 # \begin{aligned}
-# r_n & = \cos\left[(n+\tfrac{1}{2})\pi/2N],\quad{\rm for} 0\le n < N,\quad{\rm and}\\
+# r_n & = \cos\left[(n+\tfrac{1}{2})\pi/2N\right],\quad{\rm for} 0\le n < N,\quad{\rm and}\\
 # \theta_m & = 2\pi m/M,\quad{\rm for}\quad 0\le m < M;
 # \end{aligned}
 # ```
diff --git a/examples/padua.jl b/examples/padua.jl
index 2945f273..a126e6f8 100644
--- a/examples/padua.jl
+++ b/examples/padua.jl
@@ -7,12 +7,14 @@ using FastTransforms
 # We define the Padua points and extract Cartesian components:
 N = 15
 pts = paduapoints(N)
-x = pts[:,1];
-y = pts[:,2];
+x = pts[:,1]
+y = pts[:,2]
+nothing #hide
 
 # We take the Padua transform of the function:
 f = (x,y) -> exp(x + cos(y))
-f̌ = paduatransform(f.(x , y));
+f̌ = paduatransform(f.(x , y))
+nothing #hide
 
 # and use the coefficients to create an approximation to the function $f$:
 f̃ = (x,y) -> begin
diff --git a/examples/spinweighted.jl b/examples/spinweighted.jl
index 7c07476c..2720543e 100644
--- a/examples/spinweighted.jl
+++ b/examples/spinweighted.jl
@@ -5,7 +5,7 @@
 # ```
 # for some $k\in\mathbb{R}^3$ and where $r\in\mathbb{S}^2$, using spin-$0$ spherical harmonics.
 #
-# It applies $\dh$, the spin-raising operator,
+# It applies ð, the spin-raising operator,
 # both on the spin-$0$ coefficients as well as the original function,
 # followed by a spin-$1$ analysis to compare coefficients.
 #
@@ -36,13 +36,13 @@ P = plan_spinsph2fourier(F, 0)
 # And an FFTW Fourier analysis plan on $\mathbb{S}^2$:
 PA = plan_spinsph_analysis(F, 0)
 
-# Its spin-0 spherical harmonic coefficients are:
+# Its spin-$0$ spherical harmonic coefficients are:
 U⁰ = P\(PA*F)
 
 # We can check its $L^2(\mathbb{S}^2)$ norm against an exact result:
 norm(U⁰) ≈ sqrt(4π)
 
-# Spin can be incremented by applying $\dh$, either on the spin-$0$ coefficients:
+# Spin can be incremented by applying ð, either on the spin-$0$ coefficients:
 U¹c = zero(U⁰)
 for n in 1:N-1
     U¹c[n, 1] = sqrt(n*(n+1))*U⁰[n+1, 1]
diff --git a/examples/triangle.jl b/examples/triangle.jl
index 32971b14..8b2afff5 100644
--- a/examples/triangle.jl
+++ b/examples/triangle.jl
@@ -11,7 +11,7 @@
 # We analyze $f(x,y)$ on an $N\times M$ mapped tensor product grid defined by:
 # ```math
 # \begin{aligned}
-# x & = (1+u)/2,\quad{\rm and}\quad y = (1-u)*(1+v)/4,\quad {\rm where:}\\
+# x & = (1+u)/2,\quad{\rm and}\quad y = (1-u)(1+v)/4,\quad {\rm where:}\\
 # u_n & = \cos\left[(n+\tfrac{1}{2})\pi/N\right],\quad{\rm for}\quad 0\le n < N,\quad{\rm and}\\
 # v_m & = \cos\left[(m+\tfrac{1}{2})\pi/M\right],\quad{\rm for}\quad 0\le m < M;
 # \end{aligned}

From c68fceb0169d7736a29672c278513bb018c4b608 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Sep 2020 15:56:48 -0500
Subject: [PATCH 035/222] one more \quad

---
 examples/disk.jl  | 2 +-
 examples/padua.jl | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/examples/disk.jl b/examples/disk.jl
index 599feb98..019d2ae6 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -8,7 +8,7 @@
 # We analyze the function on an $N\times M$ tensor product grid defined by:
 # ```math
 # \begin{aligned}
-# r_n & = \cos\left[(n+\tfrac{1}{2})\pi/2N\right],\quad{\rm for} 0\le n < N,\quad{\rm and}\\
+# r_n & = \cos\left[(n+\tfrac{1}{2})\pi/2N\right],\quad{\rm for}\quad 0\le n < N,\quad{\rm and}\\
 # \theta_m & = 2\pi m/M,\quad{\rm for}\quad 0\le m < M;
 # \end{aligned}
 # ```
diff --git a/examples/padua.jl b/examples/padua.jl
index a126e6f8..91655ce4 100644
--- a/examples/padua.jl
+++ b/examples/padua.jl
@@ -8,13 +8,11 @@ using FastTransforms
 N = 15
 pts = paduapoints(N)
 x = pts[:,1]
-y = pts[:,2]
-nothing #hide
+y = pts[:,2];
 
 # We take the Padua transform of the function:
 f = (x,y) -> exp(x + cos(y))
-f̌ = paduatransform(f.(x , y))
-nothing #hide
+f̌ = paduatransform(f.(x , y));
 
 # and use the coefficients to create an approximation to the function $f$:
 f̃ = (x,y) -> begin

From 4a076c9fbacfe5201124cfe3b1d7002dbb0a8c7b Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 14 Sep 2020 11:00:48 -0500
Subject: [PATCH 036/222] fix tabs

---
 docs/make.jl | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 41486745..c896ba94 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -4,38 +4,38 @@ const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
 const OUTPUT_DIR   = joinpath(@__DIR__, "src/generated")
 
 examples = [
-	"chebyshev.jl",
-	"disk.jl",
-	"nonlocaldiffusion.jl",
-	"padua.jl",
-	"sphere.jl",
-	"spinweighted.jl",
-	"triangle.jl",
+    "chebyshev.jl",
+    "disk.jl",
+    "nonlocaldiffusion.jl",
+    "padua.jl",
+    "sphere.jl",
+    "spinweighted.jl",
+    "triangle.jl",
 ]
 
 for example in examples
-	example_filepath = joinpath(EXAMPLES_DIR, example)
-	Literate.markdown(example_filepath, OUTPUT_DIR; execute=true)
+    example_filepath = joinpath(EXAMPLES_DIR, example)
+    Literate.markdown(example_filepath, OUTPUT_DIR; execute=true)
 end
 
 makedocs(
-			doctest = false,
-			format = Documenter.HTML(),
-			sitename = "FastTransforms.jl",
-			authors = "Richard Mikael Slevinsky",
-			pages = Any[
-					"Home" => "index.md",
-					"Examples" => [
-						"generated/chebyshev.md",
-						"generated/disk.md",
-						"generated/nonlocaldiffusion.md",
-						"generated/padua.md",
-        				"generated/sphere.md",
-						"generated/spinweighted.md",
-						"generated/triangle.md",
-        				],
-					]
-			)
+            doctest = false,
+            format = Documenter.HTML(),
+            sitename = "FastTransforms.jl",
+            authors = "Richard Mikael Slevinsky",
+            pages = Any[
+                    "Home" => "index.md",
+                    "Examples" => [
+                        "generated/chebyshev.md",
+                        "generated/disk.md",
+                        "generated/nonlocaldiffusion.md",
+                        "generated/padua.md",
+                        "generated/sphere.md",
+                        "generated/spinweighted.md",
+                        "generated/triangle.md",
+                        ],
+                    ]
+        )
 
 
 deploydocs(

From da2fb36afeb50770d4d969d718b963df2d174b41 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 2 Oct 2020 09:58:30 -0500
Subject: [PATCH 037/222] migrate to Travis.com

---
 Project.toml | 2 +-
 README.md    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index a643642f..55abb413 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.10.1"
+version = "0.10.2"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/README.md b/README.md
index aa3f95c8..f0e55a9a 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # FastTransforms.jl
 
-[![Build Status](https://github.com/JuliaApproximation/FastTransforms.jl/workflows/CI/badge.svg)](https://github.com/JuliaApproximation/FastTransforms.jl/actions?query=workflow%3ACI) [![Travis](https://travis-ci.org/JuliaApproximation/FastTransforms.jl.svg?branch=master)](https://travis-ci.org/JuliaApproximation/FastTransforms.jl) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/dev)
+[![Build Status](https://github.com/JuliaApproximation/FastTransforms.jl/workflows/CI/badge.svg)](https://github.com/JuliaApproximation/FastTransforms.jl/actions?query=workflow%3ACI) [![Travis](https://travis-ci.com/JuliaApproximation/FastTransforms.jl.svg?branch=master)](https://travis-ci.com/JuliaApproximation/FastTransforms.jl) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/dev)
 
 `FastTransforms.jl` allows the user to conveniently work with orthogonal polynomials with degrees well into the millions.
 

From 8a000df9e2381f414fece33924bc7ac00b059570 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Thu, 22 Oct 2020 20:07:47 +0100
Subject: [PATCH 038/222] Support FillArrays v0.10 (#122)

---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 55abb413..c32c3a78 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.10.2"
+version = "0.10.3"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -26,7 +26,7 @@ DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.3.3"
-FillArrays = "0.8, 0.9"
+FillArrays = "0.8, 0.9, 0.10"
 Reexport = "0.2"
 SpecialFunctions = "0.8, 0.9, 0.10"
 ToeplitzMatrices = "0.6"

From 474131c80be3b1c804c504d2fb9bd45381d3155d Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 25 Nov 2020 13:57:00 -0600
Subject: [PATCH 039/222] [FastTransforms] v0.4 (#125)

* let the build script point to master

add wrapper and examples

* add extra conversions from NTuple{*, Float64}

* rename ZY_ to yz_

* update disk2cxf, add rectdisk2cheb

* remove nightly tests

* remove unnecessary parentheses
---
 .github/workflows/ci.yml        |   1 -
 Project.toml                    |  12 +--
 README.md                       |   2 +-
 deps/build.jl                   |   7 +-
 examples/disk.jl                |  56 +++++++++++++-
 examples/sphericalisometries.jl | 124 ++++++++++++++++++++++++++++++
 src/FastTransforms.jl           |   8 +-
 src/libfasttransforms.jl        | 131 +++++++++++++++++++++++++++-----
 src/specialfunctions.jl         |   5 ++
 test/libfasttransformstests.jl  |  15 +++-
 10 files changed, 319 insertions(+), 42 deletions(-)
 create mode 100644 examples/sphericalisometries.jl

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 880f7f81..223528e7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -11,7 +11,6 @@ jobs:
         version:
           - '1.3'
           - '1.5'
-          - 'nightly'
         os:
           - ubuntu-latest
           - macOS-latest
diff --git a/Project.toml b/Project.toml
index c32c3a78..190d423a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.10.3"
+version = "0.11.0"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -19,15 +19,15 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
-AbstractFFTs = "0.4, 0.5"
-ArrayLayouts = "0.3.7, 0.4"
+AbstractFFTs = "0.5"
+ArrayLayouts = "0.4"
 BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.3.3"
-FillArrays = "0.8, 0.9, 0.10"
+FastTransforms_jll = "0.4.0"
+FillArrays = "0.9, 0.10"
 Reexport = "0.2"
-SpecialFunctions = "0.8, 0.9, 0.10"
+SpecialFunctions = "0.10, 1"
 ToeplitzMatrices = "0.6"
 julia = "1.3"
diff --git a/README.md b/README.md
index f0e55a9a..025ffd38 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ julia> using FastTransforms, LinearAlgebra
 
 ## Fast orthogonal polynomial transforms
 
-The 29 orthogonal polynomial transforms are listed in `FastTransforms.kind2string.(0:28)`. Univariate transforms may be planned with the standard normalization or with orthonormalization. For multivariate transforms, the standard normalization may be too severe for floating-point computations, so it is omitted. Here are two examples:
+The 33 orthogonal polynomial transforms are listed in `FastTransforms.kind2string.(0:32)`. Univariate transforms may be planned with the standard normalization or with orthonormalization. For multivariate transforms, the standard normalization may be too severe for floating-point computations, so it is omitted. Here are two examples:
 
 ### The Chebyshev--Legendre transform
 
diff --git a/deps/build.jl b/deps/build.jl
index e136faaf..38cd3766 100644
--- a/deps/build.jl
+++ b/deps/build.jl
@@ -1,8 +1,6 @@
 using BinaryProvider
 import Libdl
 
-version = v"0.3.3"
-
 const extension = Sys.isapple() ? "dylib" : Sys.islinux() ? "so" : Sys.iswindows() ? "dll" : ""
 
 print_error() = error(
@@ -26,10 +24,11 @@ if ft_build_from_source == "true"
         if [ -d "FastTransforms" ]; then
             cd FastTransforms
             git fetch
-            git checkout v$version
+            git checkout master
+            git pull
             cd ..
         else
-            git clone -b v$version https://github.com/MikaelSlevinsky/FastTransforms.git FastTransforms
+            git clone https://github.com/MikaelSlevinsky/FastTransforms.git FastTransforms
         fi
         cd FastTransforms
         $make assembly
diff --git a/examples/disk.jl b/examples/disk.jl
index 019d2ae6..e4ce0233 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -37,8 +37,9 @@ r = [sinpi((N-n-0.5)/(2N)) for n in 0:N-1]
 # On the mapped tensor product grid, our function samples are:
 F = [f(r*cospi(θ), r*sinpi(θ)) for r in r, θ in θ]
 
-# We precompute a Zernike--Chebyshev×Fourier plan:
-P = plan_disk2cxf(F)
+# We precompute a (generalized) Zernike--Chebyshev×Fourier plan:
+α, β = 0, 0
+P = plan_disk2cxf(F, α, β)
 
 # And an FFTW Chebyshev×Fourier analysis plan on the disk:
 PA = plan_disk_analysis(F)
@@ -47,10 +48,57 @@ PA = plan_disk_analysis(F)
 U = P\(PA*F)
 
 # The Zernike coefficients are useful for integration. The integral of $f(x,y)$
-# over the disk should be $\pi/2$ by harmonicity. The coefficient of $Z_0^0$
+# over the disk should be $\pi/2$ by harmonicity. The coefficient of $Z_{0,0}$
 # multiplied by `√π` is:
 U[1, 1]*sqrt(π)
 
 # Using an orthonormal basis, the integral of $[f(x,y)]^2$ over the disk is
 # approximately the square of the 2-norm of the coefficients:
-norm(U)^2
+norm(U)^2, π/(2*sqrt(2))*log1p(sqrt(2))
+
+# But there's more! Next, we repeat the experiment using the Dunkl-Xu
+# orthonormal polynomials supported on the rectangularized disk.
+N = 2N
+M = N
+
+# We analyze the function on an $N\times M$ mapped tensor product $xy$-grid defined by:
+# ```math
+# \begin{aligned}
+# x_n & = \cos\left(\frac{2n+1}{2N}\pi\right) = \sin\left(\frac{N-2n-1}{2N}\pi\right),\quad {\rm for} \quad 0 \le n < N,\quad{\rm and}\\
+# z_m & = \cos\left(\frac{2m+1}{2M}\pi\right) = \sin\left(\frac{M-2m-1}{2M}\pi\right),\quad {\rm for} \quad 0 \le m < M,\\
+# y_{n,m} & = \sqrt{1-x_n^2}z_m.
+# \end{aligned}
+# ```
+# Slightly more accuracy can be expected by using an auxiliary array:
+# ```math
+#   w_n = \sin\left(\frac{2n+1}{2N}\pi\right),\quad {\rm for} \quad 0 \le n < N,
+# ```
+# so that $y_{n,m} = w_nz_m$.
+#
+# The x grid
+w = [sinpi((n+0.5)/N) for n in 0:N-1]
+x = [sinpi((N-2n-1)/(2N)) for n in 0:N-1]
+
+# The z grid
+z = [sinpi((M-2m-1)/(2M)) for m in 0:M-1]
+
+# On the mapped tensor product grid, our function samples are:
+F = [f(x[n], w[n]*z) for n in 1:N, z in z]
+
+# We precompute a Dunkl-Xu--Chebyshev plan:
+P = plan_rectdisk2cheb(F, β)
+
+# And an FFTW Chebyshev² analysis plan on the rectangularized disk:
+PA = plan_rectdisk_analysis(F)
+
+# Its Dunkl-Xu coefficients are:
+U = P\(PA*F)
+
+# The Dunkl-Xu coefficients are useful for integration. The integral of $f(x,y)$
+# over the disk should be $\pi/2$ by harmonicity. The coefficient of $P_{0,0}$
+# multiplied by `√π` is:
+U[1, 1]*sqrt(π)
+
+# Using an orthonormal basis, the integral of $[f(x,y)]^2$ over the disk is
+# approximately the square of the 2-norm of the coefficients:
+norm(U)^2, π/(2*sqrt(2))*log1p(sqrt(2))
diff --git a/examples/sphericalisometries.jl b/examples/sphericalisometries.jl
new file mode 100644
index 00000000..b3ed7f6b
--- /dev/null
+++ b/examples/sphericalisometries.jl
@@ -0,0 +1,124 @@
+function threshold!(A::AbstractArray, ϵ)
+    for i in eachindex(A)
+        if abs(A[i]) < ϵ A[i] = 0 end
+    end
+    A
+end
+
+using FastTransforms, LinearAlgebra, Random, Test
+
+# The colatitudinal grid (mod π):
+N = 10
+θ = (0.5:N-0.5)/N
+
+# The longitudinal grid (mod π):
+M = 2*N-1
+φ = (0:M-1)*2/M
+
+x = [cospi(φ)*sinpi(θ) for θ in θ, φ in φ]
+y = [sinpi(φ)*sinpi(θ) for θ in θ, φ in φ]
+z = [cospi(θ) for θ in θ, φ in φ]
+
+P = plan_sph2fourier(Float64, N)
+PA = plan_sph_analysis(Float64, N, M)
+J = FastTransforms.plan_sph_isometry(Float64, N)
+
+
+f = (x, y, z) -> x^2+y^4+x^2*y*z^3-x*y*z^2
+
+
+F = f.(x, y, z)
+V = PA*F
+U = threshold!(P\V, 100eps())
+FastTransforms.execute_sph_yz_axis_exchange!(J, U)
+FR = f.(x, -z, -y)
+VR = PA*FR
+UR = threshold!(P\VR, 100eps())
+@test U ≈ UR
+norm(U-UR)
+
+
+α, β, γ = 0.123, 0.456, 0.789
+
+# Isometry built up from ZYZR
+A = [cos(α) -sin(α) 0; sin(α) cos(α) 0; 0 0 1]
+B = [cos(β) 0 -sin(β); 0 1 0; sin(β) 0 cos(β)]
+C = [cos(γ) -sin(γ) 0; sin(γ) cos(γ) 0; 0 0 1]
+R = diagm([1, 1, 1.0])
+Q = A*B*C*R
+
+# Transform the sampling grid. Note that `Q` is transposed here.
+u = Q[1,1]*x + Q[2,1]*y + Q[3,1]*z
+v = Q[1,2]*x + Q[2,2]*y + Q[3,2]*z
+w = Q[1,3]*x + Q[2,3]*y + Q[3,3]*z
+
+F = f.(x, y, z)
+V = PA*F
+U = threshold!(P\V, 100eps())
+FastTransforms.execute_sph_rotation!(J, α, β, γ, U)
+FR = f.(u, v, w)
+VR = PA*FR
+UR = threshold!(P\VR, 100eps())
+@test U ≈ UR
+norm(U-UR)
+
+
+F = f.(x, y, z)
+V = PA*F
+U = threshold!(P\V, 100eps())
+FastTransforms.execute_sph_polar_reflection!(U)
+FR = f.(x, y, -z)
+VR = PA*FR
+UR = threshold!(P\VR, 100eps())
+@test U ≈ UR
+norm(U-UR)
+
+
+# Isometry built up from planar reflection
+W = [0.123, 0.456, 0.789]
+H = w -> I - 2/(w'w)*w*w'
+Q = H(W)
+
+# Transform the sampling grid. Note that `Q` is transposed here.
+u = Q[1,1]*x + Q[2,1]*y + Q[3,1]*z
+v = Q[1,2]*x + Q[2,2]*y + Q[3,2]*z
+w = Q[1,3]*x + Q[2,3]*y + Q[3,3]*z
+
+F = f.(x, y, z)
+V = PA*F
+U = threshold!(P\V, 100eps())
+FastTransforms.execute_sph_reflection!(J, W, U)
+FR = f.(u, v, w)
+VR = PA*FR
+UR = threshold!(P\VR, 100eps())
+@test U ≈ UR
+norm(U-UR)
+
+F = f.(x, y, z)
+V = PA*F
+U = threshold!(P\V, 100eps())
+FastTransforms.execute_sph_reflection!(J, (W[1], W[2], W[3]), U)
+FR = f.(u, v, w)
+VR = PA*FR
+UR = threshold!(P\VR, 100eps())
+@test U ≈ UR
+norm(U-UR)
+
+# Random orthogonal transformation
+Random.seed!(0)
+Q = qr(rand(3, 3)).Q
+
+# Transform the sampling grid, note that `Q` is transposed here.
+u = Q[1,1]*x + Q[2,1]*y + Q[3,1]*z
+v = Q[1,2]*x + Q[2,2]*y + Q[3,2]*z
+w = Q[1,3]*x + Q[2,3]*y + Q[3,3]*z
+
+F = f.(x, y, z)
+V = PA*F
+U = threshold!(P\V, 100eps())
+FastTransforms.execute_sph_orthogonal_transformation!(J, Q, U)
+FR = f.(u, v, w)
+VR = PA*FR
+UR = threshold!(P\VR, 100eps())
+@test U ≈ UR
+norm(U-UR)
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 769c7d8a..c0817a6a 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -9,7 +9,7 @@ import DSP
 @reexport using FFTW
 
 import Base: unsafe_convert, eltype, ndims, adjoint, transpose, show, *, \,
-             inv, length, size, view, getindex
+             inv, length, size, view, getindex, convert
 
 import Base.GMP: Limb
 
@@ -35,8 +35,8 @@ import LinearAlgebra: mul!, lmul!, ldiv!
 export leg2cheb, cheb2leg, ultra2ultra, jac2jac,
        lag2lag, jac2ultra, ultra2jac, jac2cheb,
        cheb2jac, ultra2cheb, cheb2ultra,
-       sph2fourier, sphv2fourier, disk2cxf, tri2cheb, tet2cheb,
-       fourier2sph, fourier2sphv, cxf2disk, cheb2tri, cheb2tet
+       sph2fourier, sphv2fourier, disk2cxf, rectdisk2cheb, tri2cheb, tet2cheb,
+       fourier2sph, fourier2sphv, cxf2disk, cheb2rectdisk, cheb2tri, cheb2tet
 
 export plan_leg2cheb, plan_cheb2leg, plan_ultra2ultra, plan_jac2jac,
        plan_lag2lag, plan_jac2ultra, plan_ultra2jac, plan_jac2cheb,
@@ -44,6 +44,7 @@ export plan_leg2cheb, plan_cheb2leg, plan_ultra2ultra, plan_jac2jac,
        plan_sph2fourier, plan_sph_synthesis, plan_sph_analysis,
        plan_sphv2fourier, plan_sphv_synthesis, plan_sphv_analysis,
        plan_disk2cxf, plan_disk_synthesis, plan_disk_analysis,
+       plan_rectdisk2cheb, plan_rectdisk_synthesis, plan_rectdisk_analysis,
        plan_tri2cheb, plan_tri_synthesis, plan_tri_analysis,
        plan_tet2cheb, plan_tet_synthesis, plan_tet_analysis,
        plan_spinsph2fourier, plan_spinsph_synthesis, plan_spinsph_analysis
@@ -91,6 +92,7 @@ include("gaunt.jl")
 export sphones, sphzeros, sphrand, sphrandn, sphevaluate,
        sphvones, sphvzeros, sphvrand, sphvrandn,
        diskones, diskzeros, diskrand, diskrandn,
+       rectdiskones, rectdiskzeros, rectdiskrand, rectdiskrandn,
        triones, trizeros, trirand, trirandn, trievaluate,
        tetones, tetzeros, tetrand, tetrandn,
        spinsphones, spinsphzeros, spinsphrand, spinsphrandn
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 23c36fef..c2cb8e65 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -119,21 +119,25 @@ const CHEB2ULTRA           = 10
 const SPHERE               = 11
 const SPHEREV              = 12
 const DISK                 = 13
-const TRIANGLE             = 14
-const TETRAHEDRON          = 15
-const SPINSPHERE           = 16
-const SPHERESYNTHESIS      = 17
-const SPHEREANALYSIS       = 18
-const SPHEREVSYNTHESIS     = 19
-const SPHEREVANALYSIS      = 20
-const DISKSYNTHESIS        = 21
-const DISKANALYSIS         = 22
-const TRIANGLESYNTHESIS    = 23
-const TRIANGLEANALYSIS     = 24
-const TETRAHEDRONSYNTHESIS = 25
-const TETRAHEDRONANALYSIS  = 26
-const SPINSPHERESYNTHESIS  = 27
-const SPINSPHEREANALYSIS   = 28
+const RECTDISK             = 14
+const TRIANGLE             = 15
+const TETRAHEDRON          = 16
+const SPINSPHERE           = 17
+const SPHERESYNTHESIS      = 18
+const SPHEREANALYSIS       = 19
+const SPHEREVSYNTHESIS     = 20
+const SPHEREVANALYSIS      = 21
+const DISKSYNTHESIS        = 22
+const DISKANALYSIS         = 23
+const RECTDISKSYNTHESIS    = 24
+const RECTDISKANALYSIS     = 25
+const TRIANGLESYNTHESIS    = 26
+const TRIANGLEANALYSIS     = 27
+const TETRAHEDRONSYNTHESIS = 28
+const TETRAHEDRONANALYSIS  = 29
+const SPINSPHERESYNTHESIS  = 30
+const SPINSPHEREANALYSIS   = 31
+const SPHERICALISOMETRY    = 32
 
 
 let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
@@ -150,6 +154,7 @@ let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                SPHERE               => "Spherical harmonic--Fourier",
                SPHEREV              => "Spherical vector field--Fourier",
                DISK                 => "Zernike--Chebyshev×Fourier",
+               RECTDISK             => "Dunkl-Xu--Chebyshev²",
                TRIANGLE             => "Proriol--Chebyshev²",
                TETRAHEDRON          => "Proriol--Chebyshev³",
                SPINSPHERE           => "Spin-weighted spherical harmonic--Fourier",
@@ -159,12 +164,15 @@ let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                SPHEREVANALYSIS      => "FFTW Fourier analysis on the sphere (vector field)",
                DISKSYNTHESIS        => "FFTW Chebyshev×Fourier synthesis on the disk",
                DISKANALYSIS         => "FFTW Chebyshev×Fourier analysis on the disk",
+               RECTDISKSYNTHESIS    => "FFTW Chebyshev synthesis on the rectangularized disk",
+               RECTDISKANALYSIS     => "FFTW Chebyshev analysis on the rectangularized disk",
                TRIANGLESYNTHESIS    => "FFTW Chebyshev synthesis on the triangle",
                TRIANGLEANALYSIS     => "FFTW Chebyshev analysis on the triangle",
                TETRAHEDRONSYNTHESIS => "FFTW Chebyshev synthesis on the tetrahedron",
                TETRAHEDRONANALYSIS  => "FFTW Chebyshev analysis on the tetrahedron",
                SPINSPHERESYNTHESIS  => "FFTW Fourier synthesis on the sphere (spin-weighted)",
-               SPINSPHEREANALYSIS   => "FFTW Fourier analysis on the sphere (spin-weighted)")
+               SPINSPHEREANALYSIS   => "FFTW Fourier analysis on the sphere (spin-weighted)",
+               SPHERICALISOMETRY    => "Spherical isometry")
     global kind2string
     kind2string(k::Integer) = k2s[Int(k)]
 end
@@ -199,11 +207,13 @@ show(io::IO, p::FTPlan{T, 1, K}) where {T, K} = print(io, "FastTransforms ", kin
 show(io::IO, p::FTPlan{T, 2, SPHERE}) where T = print(io, "FastTransforms ", kind2string(SPHERE), " plan for $(p.n)×$(2p.n-1)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, SPHEREV}) where T = print(io, "FastTransforms ", kind2string(SPHEREV), " plan for $(p.n)×$(2p.n-1)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, DISK}) where T = print(io, "FastTransforms ", kind2string(DISK), " plan for $(p.n)×$(4p.n-3)-element array of ", T)
+show(io::IO, p::FTPlan{T, 2, RECTDISK}) where T = print(io, "FastTransforms ", kind2string(RECTDISK), " plan for $(p.n)×$(p.n)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, TRIANGLE}) where T = print(io, "FastTransforms ", kind2string(TRIANGLE), " plan for $(p.n)×$(p.n)-element array of ", T)
 show(io::IO, p::FTPlan{T, 3, TETRAHEDRON}) where T = print(io, "FastTransforms ", kind2string(TETRAHEDRON), " plan for $(p.n)×$(p.n)×$(p.n)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, SPINSPHERE}) where T = print(io, "FastTransforms ", kind2string(SPINSPHERE), " plan for $(p.n)×$(2p.n-1)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, K}) where {T, K} = print(io, "FastTransforms plan for ", kind2string(K), " for $(p.n)×$(p.m)-element array of ", T)
 show(io::IO, p::FTPlan{T, 3, K}) where {T, K} = print(io, "FastTransforms plan for ", kind2string(K), " for $(p.n)×$(p.l)×$(p.m)-element array of ", T)
+show(io::IO, p::FTPlan{T, 2, SPHERICALISOMETRY}) where T = print(io, "FastTransforms ", kind2string(SPHERICALISOMETRY), " plan for $(p.n)×$(2p.n-1)-element array of ", T)
 
 function checksize(p::FTPlan{T}, x::Array{T}) where T
     if p.n != size(x, 1)
@@ -222,6 +232,12 @@ for K in (SPHERE, SPHEREV, DISK, SPINSPHERE)
     end
 end
 
+function checksize(p::FTPlan{T, 2, SPHERICALISOMETRY}, x::Matrix{T}) where T
+    if p.n != size(x, 1) || 2p.n-1 != size(x, 2)
+        throw(DimensionMismatch("This FTPlan must operate on arrays of size $(p.n) × $(2p.n-1)."))
+    end
+end
+
 unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::FTPlan) = p.plan
 unsafe_convert(::Type{Ptr{mpfr_t}}, p::FTPlan) = unsafe_convert(Ptr{mpfr_t}, p.plan)
 
@@ -237,12 +253,15 @@ destroy_plan(p::FTPlan{Float64, 2, SPHEREVSYNTHESIS}) = ccall((:ft_destroy_spher
 destroy_plan(p::FTPlan{Float64, 2, SPHEREVANALYSIS}) = ccall((:ft_destroy_sphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, DISKSYNTHESIS}) = ccall((:ft_destroy_disk_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, DISKANALYSIS}) = ccall((:ft_destroy_disk_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float64, 2, RECTDISKSYNTHESIS}) = ccall((:ft_destroy_rectdisk_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float64, 2, RECTDISKANALYSIS}) = ccall((:ft_destroy_rectdisk_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, TRIANGLESYNTHESIS}) = ccall((:ft_destroy_triangle_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, TRIANGLEANALYSIS}) = ccall((:ft_destroy_triangle_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 3, TETRAHEDRONSYNTHESIS}) = ccall((:ft_destroy_tetrahedron_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 3, TETRAHEDRONANALYSIS}) = ccall((:ft_destroy_tetrahedron_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHERESYNTHESIS}) = ccall((:ft_destroy_spinsphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHEREANALYSIS}) = ccall((:ft_destroy_spinsphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float64, 2, SPHERICALISOMETRY}) = ccall((:ft_destroy_sph_isometry_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 
 struct AdjointFTPlan{T, S}
     parent::S
@@ -289,7 +308,8 @@ unsafe_convert(::Type{Ptr{mpfr_t}}, p::TransposeFTPlan{T, FTPlan{T, N, K}}) wher
 for f in (:leg2cheb, :cheb2leg, :ultra2ultra, :jac2jac,
           :lag2lag, :jac2ultra, :ultra2jac, :jac2cheb,
           :cheb2jac, :ultra2cheb, :cheb2ultra,
-          :sph2fourier, :sphv2fourier, :disk2cxf, :tri2cheb, :tet2cheb)
+          :sph2fourier, :sphv2fourier, :disk2cxf,
+          :rectdisk2cheb, :tri2cheb, :tet2cheb)
     plan_f = Symbol("plan_", f)
     @eval begin
         $plan_f(x::AbstractArray{T}, y...; z...) where T = $plan_f(T, size(x, 1), y...; z...)
@@ -299,8 +319,8 @@ for f in (:leg2cheb, :cheb2leg, :ultra2ultra, :jac2jac,
 end
 
 for (f, plan_f) in ((:fourier2sph, :plan_sph2fourier), (:fourier2sphv, :plan_sphv2fourier),
-                    (:cxf2disk2, :plan_disk2cxf), (:cheb2tri, :plan_tri2cheb),
-                    (:cheb2tet, :plan_tet2cheb))
+                    (:cxf2disk, :plan_disk2cxf), (:cheb2rectdisk, :plan_rectdisk2cheb),
+                    (:cheb2tri, :plan_tri2cheb), (:cheb2tet, :plan_tet2cheb))
     @eval begin
         $f(x::AbstractArray, y...; z...) = $plan_f(x, y...; z...)\x
     end
@@ -488,11 +508,16 @@ function plan_sphv2fourier(::Type{Float64}, n::Integer)
     return FTPlan{Float64, 2, SPHEREV}(plan, n)
 end
 
-function plan_disk2cxf(::Type{Float64}, n::Integer)
-    plan = ccall((:ft_plan_disk2cxf, libfasttransforms), Ptr{ft_plan_struct}, (Cint, ), n)
+function plan_disk2cxf(::Type{Float64}, n::Integer, α, β)
+    plan = ccall((:ft_plan_disk2cxf, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Float64), n, α, β)
     return FTPlan{Float64, 2, DISK}(plan, n)
 end
 
+function plan_rectdisk2cheb(::Type{Float64}, n::Integer, β)
+    plan = ccall((:ft_plan_rectdisk2cheb, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64), n, β)
+    return FTPlan{Float64, 2, RECTDISK}(plan, n)
+end
+
 function plan_tri2cheb(::Type{Float64}, n::Integer, α, β, γ)
     plan = ccall((:ft_plan_tri2cheb, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Float64, Float64), n, α, β, γ)
     return FTPlan{Float64, 2, TRIANGLE}(plan, n)
@@ -514,6 +539,8 @@ for (fJ, fC, fE, K) in ((:plan_sph_synthesis, :ft_plan_sph_synthesis, :ft_execut
                     (:plan_sphv_analysis, :ft_plan_sphv_analysis, :ft_execute_sphv_analysis, SPHEREVANALYSIS),
                     (:plan_disk_synthesis, :ft_plan_disk_synthesis, :ft_execute_disk_synthesis, DISKSYNTHESIS),
                     (:plan_disk_analysis, :ft_plan_disk_analysis, :ft_execute_disk_analysis, DISKANALYSIS),
+                    (:plan_rectdisk_synthesis, :ft_plan_rectdisk_synthesis, :ft_execute_rectdisk_synthesis, RECTDISKSYNTHESIS),
+                    (:plan_rectdisk_analysis, :ft_plan_rectdisk_analysis, :ft_execute_rectdisk_analysis, RECTDISKANALYSIS),
                     (:plan_tri_synthesis, :ft_plan_tri_synthesis, :ft_execute_tri_synthesis, TRIANGLESYNTHESIS),
                     (:plan_tri_analysis, :ft_plan_tri_analysis, :ft_execute_tri_analysis, TRIANGLEANALYSIS))
     @eval begin
@@ -595,6 +622,11 @@ function lmul!(p::FTPlan{Complex{Float64}, 2, SPINSPHEREANALYSIS}, x::Matrix{Com
     return x
 end
 
+function plan_sph_isometry(::Type{Float64}, n::Integer)
+    plan = ccall((:ft_plan_sph_isometry, libfasttransforms), Ptr{ft_plan_struct}, (Cint, ), n)
+    return FTPlan{Float64, 2, SPHERICALISOMETRY}(plan, n)
+end
+
 *(p::FTPlan{T}, x::AbstractArray{T}) where T = lmul!(p, Array(x))
 *(p::AdjointFTPlan{T}, x::AbstractArray{T}) where T = lmul!(p, Array(x))
 *(p::TransposeFTPlan{T}, x::AbstractArray{T}) where T = lmul!(p, Array(x))
@@ -703,6 +735,8 @@ for (fJ, fC, K) in ((:lmul!, :ft_execute_sph2fourier, SPHERE),
                     (:ldiv!, :ft_execute_fourier2sphv, SPHEREV),
                     (:lmul!, :ft_execute_disk2cxf, DISK),
                     (:ldiv!, :ft_execute_cxf2disk, DISK),
+                    (:lmul!, :ft_execute_rectdisk2cheb, RECTDISK),
+                    (:ldiv!, :ft_execute_cheb2rectdisk, RECTDISK),
                     (:lmul!, :ft_execute_tri2cheb, TRIANGLE),
                     (:ldiv!, :ft_execute_cheb2tri, TRIANGLE))
     @eval begin
@@ -738,6 +772,61 @@ function ldiv!(p::FTPlan{Complex{Float64}, 2, SPINSPHERE}, x::Matrix{Complex{Flo
     return x
 end
 
+function execute_sph_polar_rotation!(x::Matrix{Float64}, α)
+    ccall((:ft_execute_sph_polar_rotation, libfasttransforms), Cvoid, (Ptr{Float64}, Cint, Cint, Float64, Float64), x, size(x, 1), size(x, 2), sin(α), cos(α))
+    return x
+end
+
+function execute_sph_polar_reflection!(x::Matrix{Float64})
+    ccall((:ft_execute_sph_polar_reflection, libfasttransforms), Cvoid, (Ptr{Float64}, Cint, Cint), x, size(x, 1), size(x, 2))
+    return x
+end
+
+struct ft_orthogonal_transformation
+    Q::NTuple{9, Float64}
+end
+
+function convert(::Type{ft_orthogonal_transformation}, Q::AbstractMatrix)
+    @assert size(Q, 1) ≥ 3 && size(Q, 2) ≥ 3
+    return ft_orthogonal_transformation((Q[1, 1], Q[2, 1], Q[3, 1], Q[1, 2], Q[2, 2], Q[3, 2], Q[1, 3], Q[2, 3], Q[3, 3]))
+end
+convert(::Type{ft_orthogonal_transformation}, Q::NTuple{9, Float64}) = ft_orthogonal_transformation(Q)
+
+function execute_sph_orthogonal_transformation!(p::FTPlan{Float64, 2, SPHERICALISOMETRY}, Q, x::Matrix{Float64})
+    checksize(p, x)
+    ccall((:ft_execute_sph_orthogonal_transformation, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ft_orthogonal_transformation, Ptr{Float64}, Cint, Cint), p, Q, x, size(x, 1), size(x, 2))
+    return x
+end
+
+function execute_sph_yz_axis_exchange!(p::FTPlan{Float64, 2, SPHERICALISOMETRY}, x::Matrix{Float64})
+    checksize(p, x)
+    ccall((:ft_execute_sph_yz_axis_exchange, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), p, x, size(x, 1), size(x, 2))
+    return x
+end
+
+function execute_sph_rotation!(p::FTPlan{Float64, 2, SPHERICALISOMETRY}, α, β, γ, x::Matrix{Float64})
+    checksize(p, x)
+    ccall((:ft_execute_sph_rotation, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Float64, Float64, Float64, Ptr{Float64}, Cint, Cint), p, α, β, γ, x, size(x, 1), size(x, 2))
+    return x
+end
+
+struct ft_reflection
+    w::NTuple{3, Float64}
+end
+
+function convert(::Type{ft_reflection}, w::AbstractVector)
+    @assert length(w) ≥ 3
+    return ft_reflection((w[1], w[2], w[3]))
+end
+convert(::Type{ft_reflection}, w::NTuple{3, Float64}) = ft_reflection(w)
+
+function execute_sph_reflection!(p::FTPlan{Float64, 2, SPHERICALISOMETRY}, w, x::Matrix{Float64})
+    checksize(p, x)
+    ccall((:ft_execute_sph_reflection, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ft_reflection, Ptr{Float64}, Cint, Cint), p, w, x, size(x, 1), size(x, 2))
+    return x
+end
+execute_sph_reflection!(p::FTPlan{Float64, 2, SPHERICALISOMETRY}, w1, w2, w3, x::Matrix{Float64}) = execute_sph_reflection!(p, ft_reflection(w1, w2, w3), x)
+
 *(p::FTPlan{T}, x::Array{Complex{T}}) where T = lmul!(p, deepcopy(x))
 *(p::AdjointFTPlan{T}, x::Array{Complex{T}}) where T = lmul!(p, deepcopy(x))
 *(p::TransposeFTPlan{T}, x::Array{Complex{T}}) where T = lmul!(p, deepcopy(x))
diff --git a/src/specialfunctions.jl b/src/specialfunctions.jl
index 6ac37b73..f50637a1 100644
--- a/src/specialfunctions.jl
+++ b/src/specialfunctions.jl
@@ -551,6 +551,11 @@ end
 
 trizeros(::Type{T}, m::Int, n::Int) where T = zeros(T, m, n)
 
+const rectdiskrand = trirand
+const rectdiskrandn = trirandn
+const rectdiskones = triones
+const rectdiskzeros = trizeros
+
 """
 Pointwise evaluation of triangular harmonic:
 
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 0affa021..a8f7be77 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -105,16 +105,27 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
     test_nd_plans(p, ps, pa, A)
 
     A = diskones(Float64, n, 4n-3)
-    p = plan_disk2cxf(A)
+    p = plan_disk2cxf(A, α, β)
     ps = plan_disk_synthesis(A)
     pa = plan_disk_analysis(A)
     test_nd_plans(p, ps, pa, A)
     A = diskones(Float64, n, 4n-3) + im*diskones(Float64, n, 4n-3)
-    p = plan_disk2cxf(A)
+    p = plan_disk2cxf(A, α, β)
     ps = plan_disk_synthesis(A)
     pa = plan_disk_analysis(A)
     test_nd_plans(p, ps, pa, A)
 
+    A = rectdiskones(Float64, n, n)
+    p = plan_rectdisk2cheb(A, β)
+    ps = plan_rectdisk_synthesis(A)
+    pa = plan_rectdisk_analysis(A)
+    test_nd_plans(p, ps, pa, A)
+    A = rectdiskones(Float64, n, n) + im*rectdiskones(Float64, n, n)
+    p = plan_rectdisk2cheb(A, β)
+    ps = plan_rectdisk_synthesis(A)
+    pa = plan_rectdisk_analysis(A)
+    test_nd_plans(p, ps, pa, A)
+
     A = triones(Float64, n, n)
     p = plan_tri2cheb(A, α, β, γ)
     ps = plan_tri_synthesis(A)

From 340e72ad6f4442677156312c5acab62e7094c681 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 25 Nov 2020 15:32:05 -0600
Subject: [PATCH 040/222] trial support of plotting in examples in docs

---
 docs/Project.toml |  4 ++++
 docs/make.jl      |  2 +-
 examples/disk.jl  | 50 +++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index 110e6bb0..88d31322 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,7 +1,11 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FastTransforms = "057dd010-8810-581a-b7be-e3fc3b93f78c"
+GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+PlotlyJS = "f0f68f2c-4968-5e81-91da-67840de0976a"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"
 
 [compat]
 Documenter = "~0.24"
diff --git a/docs/make.jl b/docs/make.jl
index c896ba94..f9a2a887 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -20,7 +20,7 @@ end
 
 makedocs(
             doctest = false,
-            format = Documenter.HTML(),
+            format = Documenter.HTML(assets = [asset("https://cdn.plot.ly/plotly-1.54.7.js")]),
             sitename = "FastTransforms.jl",
             authors = "Richard Mikael Slevinsky",
             pages = Any[
diff --git a/examples/disk.jl b/examples/disk.jl
index e4ce0233..4546abad 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -19,13 +19,13 @@
 # For the storage pattern of the arrays, please consult the
 # [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
 
-using FastTransforms, LinearAlgebra
+using FastTransforms, LinearAlgebra, Plots
 
 # Our function $f$ on the disk:
 f = (x,y) -> (x^2-y^2+1)/((x^2-y^2+1)^2+(2x*y+1)^2)
 
 # The Zernike polynomial degree:
-N = 5
+N = 20
 M = 4N-3
 
 # The radial grid:
@@ -37,6 +37,29 @@ r = [sinpi((N-n-0.5)/(2N)) for n in 0:N-1]
 # On the mapped tensor product grid, our function samples are:
 F = [f(r*cospi(θ), r*sinpi(θ)) for r in r, θ in θ]
 
+# We superpose the grid on top of a contour plot of $f$:
+X = [r*cospi(θ) for r in r, θ in θ]
+Y = [r*sinpi(θ) for r in r, θ in θ]
+
+# PyPlot
+pyplot()
+contourf(X, Y, F; levels=30, linewidth=2, xlabel="x", ylabel="y")
+scatter!(X, Y; legend=false, markercolor=:red, size=(600, 600))
+
+# bis
+scatter3d(vec(X), vec(Y), vec(0X); markersize=2.0, markercolor=:red, size=(600, 600))
+surface!(X, Y, F; legend=false, xlabel="x", ylabel="y")
+
+# PlotlyJS
+plotlyjs()
+scatter3d(vec(X), vec(Y), vec(0X); markersize=0.5, markercolor=:red, size=(600, 600))
+surface!(X, Y, F; legend=false, xlabel="x", ylabel="y")
+
+# GR
+gr()
+scatter3d(vec(X), vec(Y), vec(0X); markersize=0.5, markercolor=:red, size=(600, 600))
+surface!(vec(X), vec(Y), vec(F); legend=false, xlabel="x", ylabel="y")
+
 # We precompute a (generalized) Zernike--Chebyshev×Fourier plan:
 α, β = 0, 0
 P = plan_disk2cxf(F, α, β)
@@ -85,6 +108,29 @@ z = [sinpi((M-2m-1)/(2M)) for m in 0:M-1]
 # On the mapped tensor product grid, our function samples are:
 F = [f(x[n], w[n]*z) for n in 1:N, z in z]
 
+# We superpose the grid on top of a contour plot of $f$:
+X = [x for x in x, z in z]
+Y = [w*z for w in w, z in z]
+
+# PyPlot
+pyplot()
+contourf(X, Y, F; levels=30, linewidth=2, xlabel="x", ylabel="y")
+scatter!(X, Y; legend=false, markercolor=:green, size=(600, 600))
+
+# bis
+scatter3d(vec(X), vec(Y), vec(0X); markersize=2.0, markercolor=:green, size=(600, 600))
+surface!(X, Y, F; legend=false, xlabel="x", ylabel="y")
+
+# PlotlyJS
+plotlyjs()
+scatter3d(vec(X), vec(Y), vec(0X); markersize=0.5, markercolor=:green, size=(600, 600))
+surface!(X, Y, F; legend=false, xlabel="x", ylabel="y")
+
+# GR
+gr()
+scatter3d(vec(X), vec(Y), vec(0X); markersize=0.5, markercolor=:green, size=(600, 600))
+surface!(vec(X), vec(Y), vec(F); legend=false, xlabel="x", ylabel="y")
+
 # We precompute a Dunkl-Xu--Chebyshev plan:
 P = plan_rectdisk2cheb(F, β)
 

From 5def66de2cee414124dad3f4cf969fa85d0f4032 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 25 Nov 2020 15:45:06 -0600
Subject: [PATCH 041/222] PyPlot.jl erred in docs

---
 docs/make.jl | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/docs/make.jl b/docs/make.jl
index f9a2a887..6e4267ca 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,4 +1,13 @@
-using Documenter, FastTransforms, Literate
+using Documenter, FastTransforms, Literate, Plots
+
+# Set matplotlib gui backend
+ENV["MPLBACKEND"] = "agg"
+ENV["PYTHON"] = ""
+
+# Initialize backends
+pyplot()
+plotlyjs()
+gr()
 
 const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
 const OUTPUT_DIR   = joinpath(@__DIR__, "src/generated")

From 1c71fc0ee6c44ffad77af0f8fc26d924cb47322e Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 25 Nov 2020 15:52:25 -0600
Subject: [PATCH 042/222] remove PyPlot.jl

---
 docs/Project.toml |  1 -
 docs/make.jl      |  6 ------
 examples/disk.jl  | 18 ------------------
 3 files changed, 25 deletions(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index 88d31322..9748dd88 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -5,7 +5,6 @@ GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 PlotlyJS = "f0f68f2c-4968-5e81-91da-67840de0976a"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
-PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee"
 
 [compat]
 Documenter = "~0.24"
diff --git a/docs/make.jl b/docs/make.jl
index 6e4267ca..f69a6be5 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,11 +1,5 @@
 using Documenter, FastTransforms, Literate, Plots
 
-# Set matplotlib gui backend
-ENV["MPLBACKEND"] = "agg"
-ENV["PYTHON"] = ""
-
-# Initialize backends
-pyplot()
 plotlyjs()
 gr()
 
diff --git a/examples/disk.jl b/examples/disk.jl
index 4546abad..59916416 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -41,15 +41,6 @@ F = [f(r*cospi(θ), r*sinpi(θ)) for r in r, θ in θ]
 X = [r*cospi(θ) for r in r, θ in θ]
 Y = [r*sinpi(θ) for r in r, θ in θ]
 
-# PyPlot
-pyplot()
-contourf(X, Y, F; levels=30, linewidth=2, xlabel="x", ylabel="y")
-scatter!(X, Y; legend=false, markercolor=:red, size=(600, 600))
-
-# bis
-scatter3d(vec(X), vec(Y), vec(0X); markersize=2.0, markercolor=:red, size=(600, 600))
-surface!(X, Y, F; legend=false, xlabel="x", ylabel="y")
-
 # PlotlyJS
 plotlyjs()
 scatter3d(vec(X), vec(Y), vec(0X); markersize=0.5, markercolor=:red, size=(600, 600))
@@ -112,15 +103,6 @@ F = [f(x[n], w[n]*z) for n in 1:N, z in z]
 X = [x for x in x, z in z]
 Y = [w*z for w in w, z in z]
 
-# PyPlot
-pyplot()
-contourf(X, Y, F; levels=30, linewidth=2, xlabel="x", ylabel="y")
-scatter!(X, Y; legend=false, markercolor=:green, size=(600, 600))
-
-# bis
-scatter3d(vec(X), vec(Y), vec(0X); markersize=2.0, markercolor=:green, size=(600, 600))
-surface!(X, Y, F; legend=false, xlabel="x", ylabel="y")
-
 # PlotlyJS
 plotlyjs()
 scatter3d(vec(X), vec(Y), vec(0X); markersize=0.5, markercolor=:green, size=(600, 600))

From 7a897903be1380a3f2f3fd5336824c471901e45b Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 25 Nov 2020 16:12:26 -0600
Subject: [PATCH 043/222] plotlyjs wins

---
 docs/Project.toml |  1 -
 docs/make.jl      |  1 -
 examples/disk.jl  | 25 +++++--------------------
 3 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index 9748dd88..313c185c 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,7 +1,6 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FastTransforms = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 PlotlyJS = "f0f68f2c-4968-5e81-91da-67840de0976a"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
diff --git a/docs/make.jl b/docs/make.jl
index f69a6be5..0a1335dd 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,7 +1,6 @@
 using Documenter, FastTransforms, Literate, Plots
 
 plotlyjs()
-gr()
 
 const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
 const OUTPUT_DIR   = joinpath(@__DIR__, "src/generated")
diff --git a/examples/disk.jl b/examples/disk.jl
index 59916416..c6b16f5d 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -20,6 +20,7 @@
 # [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
 
 using FastTransforms, LinearAlgebra, Plots
+plotlyjs()
 
 # Our function $f$ on the disk:
 f = (x,y) -> (x^2-y^2+1)/((x^2-y^2+1)^2+(2x*y+1)^2)
@@ -37,20 +38,12 @@ r = [sinpi((N-n-0.5)/(2N)) for n in 0:N-1]
 # On the mapped tensor product grid, our function samples are:
 F = [f(r*cospi(θ), r*sinpi(θ)) for r in r, θ in θ]
 
-# We superpose the grid on top of a contour plot of $f$:
+# We superpose a surface plot of $f$ on top of the grid:
 X = [r*cospi(θ) for r in r, θ in θ]
 Y = [r*sinpi(θ) for r in r, θ in θ]
-
-# PlotlyJS
-plotlyjs()
-scatter3d(vec(X), vec(Y), vec(0X); markersize=0.5, markercolor=:red, size=(600, 600))
+scatter3d(vec(X), vec(Y), vec(0X); markercolor=:red, size=(1000, 1000))
 surface!(X, Y, F; legend=false, xlabel="x", ylabel="y")
 
-# GR
-gr()
-scatter3d(vec(X), vec(Y), vec(0X); markersize=0.5, markercolor=:red, size=(600, 600))
-surface!(vec(X), vec(Y), vec(F); legend=false, xlabel="x", ylabel="y")
-
 # We precompute a (generalized) Zernike--Chebyshev×Fourier plan:
 α, β = 0, 0
 P = plan_disk2cxf(F, α, β)
@@ -99,20 +92,12 @@ z = [sinpi((M-2m-1)/(2M)) for m in 0:M-1]
 # On the mapped tensor product grid, our function samples are:
 F = [f(x[n], w[n]*z) for n in 1:N, z in z]
 
-# We superpose the grid on top of a contour plot of $f$:
+# We superpose a surface plot of $f$ on top of the grid:
 X = [x for x in x, z in z]
 Y = [w*z for w in w, z in z]
-
-# PlotlyJS
-plotlyjs()
-scatter3d(vec(X), vec(Y), vec(0X); markersize=0.5, markercolor=:green, size=(600, 600))
+scatter3d(vec(X), vec(Y), vec(0X); markercolor=:red, size=(1000, 1000))
 surface!(X, Y, F; legend=false, xlabel="x", ylabel="y")
 
-# GR
-gr()
-scatter3d(vec(X), vec(Y), vec(0X); markersize=0.5, markercolor=:green, size=(600, 600))
-surface!(vec(X), vec(Y), vec(F); legend=false, xlabel="x", ylabel="y")
-
 # We precompute a Dunkl-Xu--Chebyshev plan:
 P = plan_rectdisk2cheb(F, β)
 

From 04008f6e863c7879e03b2bf345f0b8ec4023c6c2 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 25 Nov 2020 17:14:57 -0600
Subject: [PATCH 044/222] better point sizes

---
 examples/disk.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/disk.jl b/examples/disk.jl
index c6b16f5d..d1a2dd79 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -41,8 +41,8 @@ F = [f(r*cospi(θ), r*sinpi(θ)) for r in r, θ in θ]
 # We superpose a surface plot of $f$ on top of the grid:
 X = [r*cospi(θ) for r in r, θ in θ]
 Y = [r*sinpi(θ) for r in r, θ in θ]
-scatter3d(vec(X), vec(Y), vec(0X); markercolor=:red, size=(1000, 1000))
-surface!(X, Y, F; legend=false, xlabel="x", ylabel="y")
+scatter3d(vec(X), vec(Y), vec(0X); markersize=0.75, markercolor=:red, size=(1000, 1000))
+surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
 
 # We precompute a (generalized) Zernike--Chebyshev×Fourier plan:
 α, β = 0, 0
@@ -95,8 +95,8 @@ F = [f(x[n], w[n]*z) for n in 1:N, z in z]
 # We superpose a surface plot of $f$ on top of the grid:
 X = [x for x in x, z in z]
 Y = [w*z for w in w, z in z]
-scatter3d(vec(X), vec(Y), vec(0X); markercolor=:red, size=(1000, 1000))
-surface!(X, Y, F; legend=false, xlabel="x", ylabel="y")
+scatter3d(vec(X), vec(Y), vec(0X); markersize=0.75, markercolor=:green, size=(1000, 1000))
+surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
 
 # We precompute a Dunkl-Xu--Chebyshev plan:
 P = plan_rectdisk2cheb(F, β)

From 9b1d77a8b4d5fdd9ce1151c83928b46d9b16a3c9 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 25 Nov 2020 21:59:49 -0600
Subject: [PATCH 045/222] reduce the grid size

---
 examples/disk.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/disk.jl b/examples/disk.jl
index d1a2dd79..e1a2f515 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -26,7 +26,7 @@ plotlyjs()
 f = (x,y) -> (x^2-y^2+1)/((x^2-y^2+1)^2+(2x*y+1)^2)
 
 # The Zernike polynomial degree:
-N = 20
+N = 15
 M = 4N-3
 
 # The radial grid:
@@ -41,7 +41,7 @@ F = [f(r*cospi(θ), r*sinpi(θ)) for r in r, θ in θ]
 # We superpose a surface plot of $f$ on top of the grid:
 X = [r*cospi(θ) for r in r, θ in θ]
 Y = [r*sinpi(θ) for r in r, θ in θ]
-scatter3d(vec(X), vec(Y), vec(0X); markersize=0.75, markercolor=:red, size=(1000, 1000))
+scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:red, size=(1000, 1000))
 surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
 
 # We precompute a (generalized) Zernike--Chebyshev×Fourier plan:
@@ -95,7 +95,7 @@ F = [f(x[n], w[n]*z) for n in 1:N, z in z]
 # We superpose a surface plot of $f$ on top of the grid:
 X = [x for x in x, z in z]
 Y = [w*z for w in w, z in z]
-scatter3d(vec(X), vec(Y), vec(0X); markersize=0.75, markercolor=:green, size=(1000, 1000))
+scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:green, size=(1000, 1000))
 surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
 
 # We precompute a Dunkl-Xu--Chebyshev plan:

From ecc0e592beabcdf32074468887c472c1d6263bb3 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 26 Nov 2020 14:17:37 -0600
Subject: [PATCH 046/222] use postprocessing to get HTML figures

---
 docs/make.jl     | 11 +++++++++--
 examples/disk.jl | 14 ++++++++++++--
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index 0a1335dd..f79256cf 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -15,14 +15,21 @@ examples = [
     "triangle.jl",
 ]
 
+function uncomment_objects(str)
+    str = replace(str, "###```@raw" => "```\n\n```@raw")
+    str = replace(str, "###<object" => "<object")
+    str = replace(str, "###```\n```" => "```")
+    str
+end
+
 for example in examples
     example_filepath = joinpath(EXAMPLES_DIR, example)
-    Literate.markdown(example_filepath, OUTPUT_DIR; execute=true)
+    Literate.markdown(example_filepath, OUTPUT_DIR; execute=true, postprocess = uncomment_objects)
 end
 
 makedocs(
             doctest = false,
-            format = Documenter.HTML(assets = [asset("https://cdn.plot.ly/plotly-1.54.7.js")]),
+            format = Documenter.HTML(),
             sitename = "FastTransforms.jl",
             authors = "Richard Mikael Slevinsky",
             pages = Any[
diff --git a/examples/disk.jl b/examples/disk.jl
index e1a2f515..916afbf7 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -20,6 +20,8 @@
 # [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
 
 using FastTransforms, LinearAlgebra, Plots
+const GENFIGS = joinpath(dirname(dirname(pathof(FastTransforms))), "docs/src/generated")
+!isdir(GENFIGS) && mkdir(GENFIGS)
 plotlyjs()
 
 # Our function $f$ on the disk:
@@ -41,8 +43,12 @@ F = [f(r*cospi(θ), r*sinpi(θ)) for r in r, θ in θ]
 # We superpose a surface plot of $f$ on top of the grid:
 X = [r*cospi(θ) for r in r, θ in θ]
 Y = [r*sinpi(θ) for r in r, θ in θ]
-scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:red, size=(1000, 1000))
+scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:red, size=(800, 600))
 surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
+savefig(joinpath(GENFIGS, "zernike.html"))
+###```@raw html
+###<object type="text/html" data="../zernike.html" style="width:100%;height:600px;"></object>
+###```
 
 # We precompute a (generalized) Zernike--Chebyshev×Fourier plan:
 α, β = 0, 0
@@ -95,8 +101,12 @@ F = [f(x[n], w[n]*z) for n in 1:N, z in z]
 # We superpose a surface plot of $f$ on top of the grid:
 X = [x for x in x, z in z]
 Y = [w*z for w in w, z in z]
-scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:green, size=(1000, 1000))
+scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:green, size=(800, 600))
 surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
+savefig(joinpath(GENFIGS, "dunklxu.html"))
+###```@raw html
+###<object type="text/html" data="../dunklxu.html" style="width:100%;height:600px;"></object>
+###```
 
 # We precompute a Dunkl-Xu--Chebyshev plan:
 P = plan_rectdisk2cheb(F, β)

From aa5973618e67ebcb2fe5fa709eda974f32962800 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 26 Nov 2020 14:25:20 -0600
Subject: [PATCH 047/222] add Proriol surface & grid plot

---
 examples/triangle.jl | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/examples/triangle.jl b/examples/triangle.jl
index 8b2afff5..bc708d9e 100644
--- a/examples/triangle.jl
+++ b/examples/triangle.jl
@@ -23,7 +23,10 @@
 # For the storage pattern of the arrays, please consult the
 # [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
 
-using FastTransforms, LinearAlgebra
+using FastTransforms, LinearAlgebra, Plots
+const GENFIGS = joinpath(dirname(dirname(pathof(FastTransforms))), "docs/src/generated")
+!isdir(GENFIGS) && mkdir(GENFIGS)
+plotlyjs()
 
 # Our function $f$ and the Cartesian components of its gradient:
 f = (x,y) -> 1/(1+x^2+y^2)
@@ -31,7 +34,7 @@ fx = (x,y) -> -2x/(1+x^2+y^2)^2
 fy = (x,y) -> -2y/(1+x^2+y^2)^2
 
 # The polynomial degree:
-N = 10
+N = 15
 M = N
 
 # The parameters of the Proriol series:
@@ -56,6 +59,16 @@ w = [sinpi((2M-2m-1)/(4M))^2 for m in 0:M-1]
 # On the mapped tensor product grid, our function samples are:
 F = [f(x[n+1], x[N-n]*w[m+1]) for n in 0:N-1, m in 0:M-1]
 
+# We superpose a surface plot of $f$ on top of the grid:
+X = [x for x in x, w in w]
+Y = [x[N-n]*w[m+1] for n in 0:N-1, m in 0:M-1]
+scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:green, size=(800, 600))
+surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
+savefig(joinpath(GENFIGS, "proriol.html"))
+###```@raw html
+###<object type="text/html" data="../proriol.html" style="width:100%;height:600px;"></object>
+###```
+
 # We precompute a Proriol--Chebyshev² plan:
 P = plan_tri2cheb(F, α, β, γ)
 

From 753531dadffc481ded799274d30a74cfcf7880f9 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 26 Nov 2020 14:42:13 -0600
Subject: [PATCH 048/222] use the standard plot heights

---
 examples/disk.jl     | 8 ++++----
 examples/triangle.jl | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/disk.jl b/examples/disk.jl
index 916afbf7..cd83a9a5 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -43,11 +43,11 @@ F = [f(r*cospi(θ), r*sinpi(θ)) for r in r, θ in θ]
 # We superpose a surface plot of $f$ on top of the grid:
 X = [r*cospi(θ) for r in r, θ in θ]
 Y = [r*sinpi(θ) for r in r, θ in θ]
-scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:red, size=(800, 600))
+scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:red)
 surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
 savefig(joinpath(GENFIGS, "zernike.html"))
 ###```@raw html
-###<object type="text/html" data="../zernike.html" style="width:100%;height:600px;"></object>
+###<object type="text/html" data="../zernike.html" style="width:100%;height:400px;"></object>
 ###```
 
 # We precompute a (generalized) Zernike--Chebyshev×Fourier plan:
@@ -101,11 +101,11 @@ F = [f(x[n], w[n]*z) for n in 1:N, z in z]
 # We superpose a surface plot of $f$ on top of the grid:
 X = [x for x in x, z in z]
 Y = [w*z for w in w, z in z]
-scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:green, size=(800, 600))
+scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:green)
 surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
 savefig(joinpath(GENFIGS, "dunklxu.html"))
 ###```@raw html
-###<object type="text/html" data="../dunklxu.html" style="width:100%;height:600px;"></object>
+###<object type="text/html" data="../dunklxu.html" style="width:100%;height:400px;"></object>
 ###```
 
 # We precompute a Dunkl-Xu--Chebyshev plan:
diff --git a/examples/triangle.jl b/examples/triangle.jl
index bc708d9e..cb4fbbae 100644
--- a/examples/triangle.jl
+++ b/examples/triangle.jl
@@ -62,11 +62,11 @@ F = [f(x[n+1], x[N-n]*w[m+1]) for n in 0:N-1, m in 0:M-1]
 # We superpose a surface plot of $f$ on top of the grid:
 X = [x for x in x, w in w]
 Y = [x[N-n]*w[m+1] for n in 0:N-1, m in 0:M-1]
-scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:green, size=(800, 600))
+scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:blue)
 surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
 savefig(joinpath(GENFIGS, "proriol.html"))
 ###```@raw html
-###<object type="text/html" data="../proriol.html" style="width:100%;height:600px;"></object>
+###<object type="text/html" data="../proriol.html" style="width:100%;height:400px;"></object>
 ###```
 
 # We precompute a Proriol--Chebyshev² plan:

From cfae853efa3a6bb4d2fea75f9700bcc62b2d60ca Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 27 Nov 2020 12:39:01 -0600
Subject: [PATCH 049/222] Update sphere.jl

---
 examples/sphere.jl | 97 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 70 insertions(+), 27 deletions(-)

diff --git a/examples/sphere.jl b/examples/sphere.jl
index 1044e070..38329493 100644
--- a/examples/sphere.jl
+++ b/examples/sphere.jl
@@ -1,11 +1,11 @@
 # # Spherical harmonic addition theorem
 # This example confirms numerically that
 # ```math
-# f(z) = \frac{P_4(z\cdot y) - P_4(x\cdot y)}{z\cdot y - x\cdot y},
+# f(z) = \frac{P_n(z\cdot y) - P_n(x\cdot y)}{z\cdot y - x\cdot y},
 # ```
-# is actually a degree-$3$ polynomial on $\mathbb{S}^2$, where $P_4$ is the degree-$4$
+# is actually a degree-$(n-1)$ polynomial on $\mathbb{S}^2$, where $P_n$ is the degree-$n$
 # Legendre polynomial, and $x,y,z \in \mathbb{S}^2$.
-# To verify, we sample the function on a $5\times9$ equiangular grid
+# To verify, we sample the function on a $N\times M$ equiangular grid
 # defined by:
 # ```math
 # \begin{aligned}
@@ -19,8 +19,8 @@
 # `plan_sph2fourier`.
 #
 # In the basis of spherical harmonics, it is plain to see the
-# addition theorem in action, since $P_4(x\cdot y)$ should only consist of
-# exact-degree-$4$ harmonics.
+# addition theorem in action, since $P_n(x\cdot y)$ should only consist of
+# exact-degree-$n$ harmonics.
 #
 # For the storage pattern of the arrays, please consult the
 # [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
@@ -32,10 +32,13 @@ function threshold!(A::AbstractArray, ϵ)
     A
 end
 
-using FastTransforms, LinearAlgebra
+using FastTransforms, LinearAlgebra, Plots
+const GENFIGS = joinpath(dirname(dirname(pathof(FastTransforms))), "docs/src/generated")
+!isdir(GENFIGS) && mkdir(GENFIGS)
+plotlyjs()
 
 # The colatitudinal grid (mod $\pi$):
-N = 5
+N = 15
 θ = (0.5:N-0.5)/N
 
 # The longitudinal grid (mod $\pi$):
@@ -51,45 +54,85 @@ y = normalize([.123,.456,.789])
 # Thus $z \in \mathbb{S}^2$ is our variable vector, parameterized in spherical coordinates:
 z = (θ,φ) -> [sinpi(θ)*cospi(φ), sinpi(θ)*sinpi(φ), cospi(θ)]
 
-# The degree-$4$ Legendre polynomial is:
-P4 = x -> (35*x^4-30*x^2+3)/8
-
-# On the tensor product grid, our function samples are:
-F = [(P4(z(θ,φ)⋅y) - P4(x⋅y))/(z(θ,φ)⋅y - x⋅y) for θ in θ, φ in φ]
-
+# On the tensor product grid, the Legendre polynomial $P_n(z\cdot y)$ is:
+A = [(2k+1)/(k+1) for k in 0:N-1]
+B = zeros(N)
+C = [k/(k+1) for k in 0:N]
+c = zeros(N); c[N] = 1
+pts = vec([z(θ, φ)⋅y for θ in θ, φ in φ])
+phi0 = ones(N*M)
+F = reshape(FastTransforms.clenshaw!(c, A, B, C, pts, phi0, zeros(N*M)), N, M)
+
+# We superpose a surface plot of $f$ on top of the grid:
+X = [sinpi(θ)*cospi(φ) for θ in θ, φ in φ]
+Y = [sinpi(θ)*sinpi(φ) for θ in θ, φ in φ]
+Z = [cospi(θ) for θ in θ, φ in φ]
+scatter3d(vec(X), vec(Y), vec(Z); markersize=1.25, markercolor=:violetred)
+surface!(X, Y, Z; surfacecolor=F, legend=false, xlabel="x", ylabel="y", zlabel="f")
+savefig(joinpath(GENFIGS, "sphere1.html"))
+###```@raw html
+###<object type="text/html" data="../sphere1.html" style="width:100%;height:400px;"></object>
+###```
+
+# We show the cut in the surface to help illustrate the definition of the grid.
+# In particular, we do not sample the poles.
+#
 # We precompute a spherical harmonic--Fourier plan:
 P = plan_sph2fourier(F)
 
 # And an FFTW Fourier analysis plan on $\mathbb{S}^2$:
 PA = plan_sph_analysis(F)
 
-# Its spherical harmonic coefficients demonstrate that it is degree-$3$:
+# Its spherical harmonic coefficients demonstrate that it is exact-degree-$n$:
 V = PA*F
-U3 = threshold!(P\V, 400*eps())
+U = threshold!(P\V, 400*eps())
+
+# The $L^2(\mathbb{S}^2)$ norm of the function is:
+nrm1 = norm(U)
 
-# Similarly, on the tensor product grid, the Legendre polynomial $P_4(z\cdot y)$ is:
-F = [P4(z(θ,φ)⋅y) for θ in θ, φ in φ]
+# Similarly, on the tensor product grid, our function samples are:
+Pnxy = FastTransforms.clenshaw!(c, A, B, C, [x⋅y], [1.0], [0.0])[1]
+F = [(F[n, m] - Pnxy)/(z(θ[n], φ[m])⋅y - x⋅y) for n in 1:N, m in 1:M]
 
-# Its spherical harmonic coefficients demonstrate that it is exact-degree-$4$:
+# We superpose a surface plot of $f$ on top of the grid:
+scatter3d(vec(X), vec(Y), vec(Z); markersize=1.25, markercolor=:violetred)
+surface!(X, Y, Z; surfacecolor=F, legend=false, xlabel="x", ylabel="y", zlabel="f")
+savefig(joinpath(GENFIGS, "sphere2.html"))
+###```@raw html
+###<object type="text/html" data="../sphere2.html" style="width:100%;height:400px;"></object>
+###```
+
+# Its spherical harmonic coefficients demonstrate that it is degree-$(n-1)$:
 V = PA*F
-U4 = threshold!(P\V, 3*eps())
+U = threshold!(P\V, 400*eps())
 
-# The $L^2(\mathbb{S}^2)$ norm of the function is:
-nrm1 = norm(U4)
+# Finally, the Legendre polynomial $P_n(z\cdot x)$ is aligned with the grid:
+pts = vec([z(θ, φ)⋅x for θ in θ, φ in φ])
+F = reshape(FastTransforms.clenshaw!(c, A, B, C, pts, phi0, zeros(N*M)), N, M)
 
-# Finally, the Legendre polynomial $P_4(z\cdot x)$ is aligned with the grid:
-F = [P4(z(θ,φ)⋅x) for θ in θ, φ in φ]
+# We superpose a surface plot of $f$ on top of the grid:
+scatter3d(vec(X), vec(Y), vec(Z); markersize=1.25, markercolor=:violetred)
+surface!(X, Y, Z; surfacecolor=F, legend=false, xlabel="x", ylabel="y", zlabel="f")
+savefig(joinpath(GENFIGS, "sphere3.html"))
+###```@raw html
+###<object type="text/html" data="../sphere3.html" style="width:100%;height:400px;"></object>
+###```
 
 # It only has one nonnegligible spherical harmonic coefficient.
 # Can you spot it?
 V = PA*F
-U4 = threshold!(P\V, 3*eps())
+U = threshold!(P\V, 400*eps())
+
+# That nonnegligible coefficient should be
+ret = eval("√(2π/($(N-1)+1/2))")
+
+# which is approximately
+eval(Meta.parse(ret))
 
-# That nonnegligible coefficient should be approximately `√(2π/(4+1/2))`,
 # since the convention in this library is to orthonormalize.
-nrm2 = norm(U4)
+nrm2 = norm(U)
 
-# Note that the integrals of both functions $P_4(z\cdot y)$ and $P_4(z\cdot x)$ and their
+# Note that the integrals of both functions $P_n(z\cdot y)$ and $P_n(z\cdot x)$ and their
 # $L^2(\mathbb{S}^2)$ norms are the same because of rotational invariance. The integral of
 # either is perhaps not interesting as it is mathematically zero, but the norms
 # of either should be approximately the same.

From acfefb6b6a0045501a2cd2907c38ffc844e110f8 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 4 Dec 2020 09:13:25 -0600
Subject: [PATCH 050/222] add associated jac2jac, update references

---
 Project.toml                   |   4 +-
 README.md                      |   8 ++-
 src/FastTransforms.jl          |   4 +-
 src/libfasttransforms.jl       | 102 +++++++++++++++++++++++++--------
 test/libfasttransformstests.jl |  10 ++++
 5 files changed, 98 insertions(+), 30 deletions(-)

diff --git a/Project.toml b/Project.toml
index 190d423a..328dbbb9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.11.0"
+version = "0.11.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -25,7 +25,7 @@ BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.4.0"
+FastTransforms_jll = "0.4.1"
 FillArrays = "0.9, 0.10"
 Reexport = "0.2"
 SpecialFunctions = "0.10, 1"
diff --git a/README.md b/README.md
index 025ffd38..d0058f7c 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ julia> using FastTransforms, LinearAlgebra
 
 ## Fast orthogonal polynomial transforms
 
-The 33 orthogonal polynomial transforms are listed in `FastTransforms.kind2string.(0:32)`. Univariate transforms may be planned with the standard normalization or with orthonormalization. For multivariate transforms, the standard normalization may be too severe for floating-point computations, so it is omitted. Here are two examples:
+The 34 orthogonal polynomial transforms are listed in `FastTransforms.kind2string.(0:33)`. Univariate transforms may be planned with the standard normalization or with orthonormalization. For multivariate transforms, the standard normalization may be too severe for floating-point computations, so it is omitted. Here are two examples:
 
 ### The Chebyshev--Legendre transform
 
@@ -159,6 +159,8 @@ julia> @time norm(ipaduatransform(paduatransform(v)) - v)/norm(v)
 
    [1]  D. Ruiz—Antolín and A. Townsend. <a href="https://doi.org/10.1137/17M1134822">A nonuniform fast Fourier transform based on low rank approximation</a>, *SIAM J. Sci. Comput.*, **40**:A529–A547, 2018.
 
-   [2]  R. M. Slevinsky. <a href="https://doi.org/10.1016/j.acha.2017.11.001">Fast and backward stable transforms between spherical harmonic expansions and bivariate Fourier series</a>, *Appl. Comput. Harmon. Anal.*, **47**:585—606, 2019.
+   [2] S. Olver, R. M. Slevinsky, and A. Townsend. <a href="https://doi.org/10.1017/S0962492920000045">Fast algorithms using orthogonal polynomials</a>, *Acta Numerica*, **29**:573—699, 2020.
 
-   [3]  R. M. Slevinsky, <a href="https://arxiv.org/abs/1711.07866">Conquering the pre-computation in two-dimensional harmonic polynomial transforms</a>, arXiv:1711.07866, 2017.
+   [3]  R. M. Slevinsky. <a href="https://doi.org/10.1016/j.acha.2017.11.001">Fast and backward stable transforms between spherical harmonic expansions and bivariate Fourier series</a>, *Appl. Comput. Harmon. Anal.*, **47**:585—606, 2019.
+
+   [4]  R. M. Slevinsky, <a href="https://arxiv.org/abs/1711.07866">Conquering the pre-computation in two-dimensional harmonic polynomial transforms</a>, arXiv:1711.07866, 2017.
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index c0817a6a..3a382be0 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -34,13 +34,13 @@ import LinearAlgebra: mul!, lmul!, ldiv!
 
 export leg2cheb, cheb2leg, ultra2ultra, jac2jac,
        lag2lag, jac2ultra, ultra2jac, jac2cheb,
-       cheb2jac, ultra2cheb, cheb2ultra,
+       cheb2jac, ultra2cheb, cheb2ultra, associatedjac2jac,
        sph2fourier, sphv2fourier, disk2cxf, rectdisk2cheb, tri2cheb, tet2cheb,
        fourier2sph, fourier2sphv, cxf2disk, cheb2rectdisk, cheb2tri, cheb2tet
 
 export plan_leg2cheb, plan_cheb2leg, plan_ultra2ultra, plan_jac2jac,
        plan_lag2lag, plan_jac2ultra, plan_ultra2jac, plan_jac2cheb,
-       plan_cheb2jac, plan_ultra2cheb, plan_cheb2ultra,
+       plan_cheb2jac, plan_ultra2cheb, plan_cheb2ultra, plan_associatedjac2jac,
        plan_sph2fourier, plan_sph_synthesis, plan_sph_analysis,
        plan_sphv2fourier, plan_sphv_synthesis, plan_sphv_analysis,
        plan_disk2cxf, plan_disk_synthesis, plan_disk_analysis,
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index c2cb8e65..bc8cd3fc 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -116,28 +116,29 @@ const JAC2CHEB              = 7
 const CHEB2JAC              = 8
 const ULTRA2CHEB            = 9
 const CHEB2ULTRA           = 10
-const SPHERE               = 11
-const SPHEREV              = 12
-const DISK                 = 13
-const RECTDISK             = 14
-const TRIANGLE             = 15
-const TETRAHEDRON          = 16
-const SPINSPHERE           = 17
-const SPHERESYNTHESIS      = 18
-const SPHEREANALYSIS       = 19
-const SPHEREVSYNTHESIS     = 20
-const SPHEREVANALYSIS      = 21
-const DISKSYNTHESIS        = 22
-const DISKANALYSIS         = 23
-const RECTDISKSYNTHESIS    = 24
-const RECTDISKANALYSIS     = 25
-const TRIANGLESYNTHESIS    = 26
-const TRIANGLEANALYSIS     = 27
-const TETRAHEDRONSYNTHESIS = 28
-const TETRAHEDRONANALYSIS  = 29
-const SPINSPHERESYNTHESIS  = 30
-const SPINSPHEREANALYSIS   = 31
-const SPHERICALISOMETRY    = 32
+const ASSOCIATEDJAC2JAC    = 11
+const SPHERE               = 12
+const SPHEREV              = 13
+const DISK                 = 14
+const RECTDISK             = 15
+const TRIANGLE             = 16
+const TETRAHEDRON          = 17
+const SPINSPHERE           = 18
+const SPHERESYNTHESIS      = 19
+const SPHEREANALYSIS       = 20
+const SPHEREVSYNTHESIS     = 21
+const SPHEREVANALYSIS      = 22
+const DISKSYNTHESIS        = 23
+const DISKANALYSIS         = 24
+const RECTDISKSYNTHESIS    = 25
+const RECTDISKANALYSIS     = 26
+const TRIANGLESYNTHESIS    = 27
+const TRIANGLEANALYSIS     = 28
+const TETRAHEDRONSYNTHESIS = 29
+const TETRAHEDRONANALYSIS  = 30
+const SPINSPHERESYNTHESIS  = 31
+const SPINSPHEREANALYSIS   = 32
+const SPHERICALISOMETRY    = 33
 
 
 let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
@@ -151,6 +152,7 @@ let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                CHEB2JAC             => "Chebyshev--Jacobi",
                ULTRA2CHEB           => "ultraspherical--Chebyshev",
                CHEB2ULTRA           => "Chebyshev--ultraspherical",
+               ASSOCIATEDJAC2JAC    => "Associated Jacobi--Jacobi",
                SPHERE               => "Spherical harmonic--Fourier",
                SPHEREV              => "Spherical vector field--Fourier",
                DISK                 => "Zernike--Chebyshev×Fourier",
@@ -244,6 +246,8 @@ unsafe_convert(::Type{Ptr{mpfr_t}}, p::FTPlan) = unsafe_convert(Ptr{mpfr_t}, p.p
 destroy_plan(p::FTPlan{Float32, 1}) = ccall((:ft_destroy_tb_eigen_FMMf, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 1}) = ccall((:ft_destroy_tb_eigen_FMM, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{BigFloat, 1}) = ccall((:ft_mpfr_destroy_plan, libfasttransforms), Cvoid, (Ptr{mpfr_t}, Cint), p, p.n)
+destroy_plan(p::FTPlan{Float32, 1, ASSOCIATEDJAC2JAC}) = ccall((:ft_destroy_btb_eigen_FMMf, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float64, 1, ASSOCIATEDJAC2JAC}) = ccall((:ft_destroy_btb_eigen_FMM, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2}) = ccall((:ft_destroy_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 3}) = ccall((:ft_destroy_tetrahedral_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHERE}) = ccall((:ft_destroy_spin_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
@@ -307,7 +311,7 @@ unsafe_convert(::Type{Ptr{mpfr_t}}, p::TransposeFTPlan{T, FTPlan{T, N, K}}) wher
 
 for f in (:leg2cheb, :cheb2leg, :ultra2ultra, :jac2jac,
           :lag2lag, :jac2ultra, :ultra2jac, :jac2cheb,
-          :cheb2jac, :ultra2cheb, :cheb2ultra,
+          :cheb2jac, :ultra2cheb, :cheb2ultra, :associatedjac2jac,
           :sph2fourier, :sphv2fourier, :disk2cxf,
           :rectdisk2cheb, :tri2cheb, :tet2cheb)
     plan_f = Symbol("plan_", f)
@@ -385,6 +389,11 @@ function plan_cheb2ultra(::Type{Float32}, n::Integer, λ; normcheb::Bool=false,
     return FTPlan{Float32, 1, CHEB2ULTRA}(plan, n)
 end
 
+function plan_associatedjac2jac(::Type{Float32}, n::Integer, c::Integer, α, β, γ, δ; norm1::Bool=false, norm2::Bool=false)
+    plan = ccall((:ft_plan_associated_jacobi_to_jacobif, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint, Cint, Float32, Float32, Float32, Float32), norm1, norm2, n, c, α, β, γ, δ)
+    return FTPlan{Float32, 1, ASSOCIATEDJAC2JAC}(plan, n)
+end
+
 
 function plan_leg2cheb(::Type{Float64}, n::Integer; normleg::Bool=false, normcheb::Bool=false)
     plan = ccall((:ft_plan_legendre_to_chebyshev, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), normleg, normcheb, n)
@@ -441,6 +450,11 @@ function plan_cheb2ultra(::Type{Float64}, n::Integer, λ; normcheb::Bool=false,
     return FTPlan{Float64, 1, CHEB2ULTRA}(plan, n)
 end
 
+function plan_associatedjac2jac(::Type{Float64}, n::Integer, c::Integer, α, β, γ, δ; norm1::Bool=false, norm2::Bool=false)
+    plan = ccall((:ft_plan_associated_jacobi_to_jacobi, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint, Cint, Float64, Float64, Float64, Float64), norm1, norm2, n, c, α, β, γ, δ)
+    return FTPlan{Float64, 1, ASSOCIATEDJAC2JAC}(plan, n)
+end
+
 
 function plan_leg2cheb(::Type{BigFloat}, n::Integer; normleg::Bool=false, normcheb::Bool=false)
     plan = ccall((:ft_mpfr_plan_legendre_to_chebyshev, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint, Clong, Int32), normleg, normcheb, n, precision(BigFloat), Base.MPFR.ROUNDING_MODE[])
@@ -664,6 +678,27 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bfmvf, :Float32),
     end
 end
 
+for (fJ, fC, elty) in ((:lmul!, :ft_bbbfmvf, :Float32),
+                       (:lmul!, :ft_bbbfmv , :Float64))
+    @eval begin
+        function $fJ(p::FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}, x::Vector{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'N', '2', '1', p, x)
+            return x
+        end
+        function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::Vector{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', '1', '2', p, x)
+            return x
+        end
+        function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::Vector{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', '1', '2', p, x)
+            return x
+        end
+    end
+end
+
 for (fJ, fC) in ((:lmul!, :ft_mpfr_trmv_ptr),
                  (:ldiv!, :ft_mpfr_trsv_ptr))
     @eval begin
@@ -708,6 +743,27 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bfmmf, :Float32),
     end
 end
 
+for (fJ, fC, elty) in ((:lmul!, :ft_bbbfmmf, :Float32),
+                       (:lmul!, :ft_bbbfmm , :Float64))
+    @eval begin
+        function $fJ(p::FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}, x::Matrix{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', '2', '1', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::Matrix{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', '1', '2', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::Matrix{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', '1', '2', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+    end
+end
+
 for (fJ, fC) in ((:lmul!, :ft_mpfr_trmm_ptr),
                  (:ldiv!, :ft_mpfr_trsm_ptr))
     @eval begin
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index a8f7be77..79a6fd5e 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -75,6 +75,16 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         end
     end
 
+    for T in (Float32, Float64, Complex{Float32}, Complex{Float64})
+        x = T(1)./(1:n)
+        Id = Matrix{T}(I, n, n)
+        p = plan_associatedjac2jac(Id, 1, α, β, γ, δ)
+        V = p*I
+        @test V ≈ p*Id
+        y = p*x
+        @test V\y ≈ x
+    end
+
     function test_nd_plans(p, ps, pa, A)
         B = copy(A)
         C = ps*(p*A)

From 44cb3d60e2434c3af51117725e83eaa93f2d070f Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Dec 2020 12:12:08 -0600
Subject: [PATCH 051/222] new example on subspace angles of Laguerre
 polynomials with a perturbed measure

---
 docs/make.jl               |  2 ++
 examples/spinweighted.jl   |  1 -
 examples/subspaceangles.jl | 29 +++++++++++++++++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 examples/subspaceangles.jl

diff --git a/docs/make.jl b/docs/make.jl
index f79256cf..2462159b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -12,6 +12,7 @@ examples = [
     "padua.jl",
     "sphere.jl",
     "spinweighted.jl",
+    "subspaceangles.jl",
     "triangle.jl",
 ]
 
@@ -41,6 +42,7 @@ makedocs(
                         "generated/padua.md",
                         "generated/sphere.md",
                         "generated/spinweighted.md",
+                        "generated/subspaceangles.md",
                         "generated/triangle.md",
                         ],
                     ]
diff --git a/examples/spinweighted.jl b/examples/spinweighted.jl
index 2720543e..6e8e8656 100644
--- a/examples/spinweighted.jl
+++ b/examples/spinweighted.jl
@@ -27,7 +27,6 @@ k = [2/7, 3/7, 6/7]
 r = (θ,φ) -> [sinpi(θ)*cospi(φ), sinpi(θ)*sinpi(φ), cospi(θ)]
 
 # On the tensor product grid, our function samples are:
-
 F = [exp(im*(k⋅r(θ,φ))) for θ in θ, φ in φ]
 
 # We precompute a spin-$0$ spherical harmonic--Fourier plan:
diff --git a/examples/subspaceangles.jl b/examples/subspaceangles.jl
new file mode 100644
index 00000000..de8aa0c5
--- /dev/null
+++ b/examples/subspaceangles.jl
@@ -0,0 +1,29 @@
+# # Subspace angles
+# This example considers the angles between neighbouring Laguerre polynomials with a perturbed measure:
+# ```math
+# \cos\theta_n = \dfrac{\langle L_n, L_{n+k}\rangle}{\|L_n|_2 \|L_{n+k}\|_2},\quad{\rm for}\quad 0\le n < N-k,
+# ```
+# where the inner product is defined by $\langle f, g\rangle = \int_0^\infty f(x) g(x) x^\beta e^{-x}{\rm\,d}x$.
+#
+# We do so by connecting Laguerre polynomials to the normalized generalized Laguerre polynomials associated with the perturbed measure. It follows by the inner product of the connection coefficients that:
+# ```math
+# \cos\theta_n = \dfrac{(V^\top V)_{n, n+k}}{\sqrt{(V^\top V)_{n, n}(V^\top V)_{n+k, n+k}}}.
+# ```
+#
+using FastTransforms, LinearAlgebra
+
+# The neighbouring index `k` and the maximum degree `N-1`:
+k, N = 1, 11
+
+# The Laguerre connection parameters:
+α, β = 0.0, 0.5
+
+# We precompute a Laguerre--Laguerre plan:
+P = plan_lag2lag(Float64, N, α, β; norm2=true)
+
+# We apply the plan to the identity, followed by the adjoint plan:
+VtV = P*I
+lmul!(P', VtV)
+
+# From this matrix, the angles are recovered from:
+θ = [acos(VtV[n, n+k]/sqrt(VtV[n, n]*VtV[n+k, n+k])) for n in 1:N-k]

From d6309ab5759b4fc61492194b9191030df58e250d Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Dec 2020 12:44:45 -0600
Subject: [PATCH 052/222] fix typo

---
 examples/subspaceangles.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/subspaceangles.jl b/examples/subspaceangles.jl
index de8aa0c5..daeff5e4 100644
--- a/examples/subspaceangles.jl
+++ b/examples/subspaceangles.jl
@@ -16,7 +16,7 @@ using FastTransforms, LinearAlgebra
 k, N = 1, 11
 
 # The Laguerre connection parameters:
-α, β = 0.0, 0.5
+α, β = 0.0, 0.125
 
 # We precompute a Laguerre--Laguerre plan:
 P = plan_lag2lag(Float64, N, α, β; norm2=true)

From 3d0fb91fac2e3758fb211ee01e445a20a8bb03bd Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Dec 2020 13:06:51 -0600
Subject: [PATCH 053/222] add julia-buildpkg to docs tests

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 223528e7..276e9b79 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,6 +52,7 @@ jobs:
       - uses: julia-actions/setup-julia@v1
         with:
           version: '1.5'
+      - uses: julia-actions/julia-buildpkg@latest
       - run: |
           julia --project=docs -e '
             using Pkg

From c02f11a80237c8d44ed85d6701eff221d5d17944 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Dec 2020 13:21:24 -0600
Subject: [PATCH 054/222] clone registry just before developing project

---
 .github/workflows/ci.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 276e9b79..61c1428c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,10 +52,11 @@ jobs:
       - uses: julia-actions/setup-julia@v1
         with:
           version: '1.5'
-      - uses: julia-actions/julia-buildpkg@latest
       - run: |
           julia --project=docs -e '
             using Pkg
+            VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General"))
+            Pkg.Registry.add("General")
             Pkg.develop(PackageSpec(path=pwd()))
             Pkg.instantiate()'
       - run: julia --project=docs docs/make.jl

From fc1e662fb0c9eb8eeb4ce64deb4a41365bcfad86 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Dec 2020 13:25:13 -0600
Subject: [PATCH 055/222] fix the chained conditionals

---
 .github/workflows/ci.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 61c1428c..10868b19 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -55,8 +55,7 @@ jobs:
       - run: |
           julia --project=docs -e '
             using Pkg
-            VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General"))
-            Pkg.Registry.add("General")
+            VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")
             Pkg.develop(PackageSpec(path=pwd()))
             Pkg.instantiate()'
       - run: julia --project=docs docs/make.jl

From 7c4c82fe0bf1cb2a16cfe85a8f9d2b888876575e Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 7 Dec 2020 13:30:11 -0600
Subject: [PATCH 056/222] remove the code that doesn't work

---
 .github/workflows/ci.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 10868b19..223528e7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -55,7 +55,6 @@ jobs:
       - run: |
           julia --project=docs -e '
             using Pkg
-            VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")
             Pkg.develop(PackageSpec(path=pwd()))
             Pkg.instantiate()'
       - run: julia --project=docs docs/make.jl

From 39566adf79f7e603aaa3974a0d17af0357267f8b Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Tue, 8 Dec 2020 11:37:01 -0600
Subject: [PATCH 057/222] fix typo

---
 examples/subspaceangles.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/subspaceangles.jl b/examples/subspaceangles.jl
index daeff5e4..d3c19a94 100644
--- a/examples/subspaceangles.jl
+++ b/examples/subspaceangles.jl
@@ -1,13 +1,13 @@
 # # Subspace angles
 # This example considers the angles between neighbouring Laguerre polynomials with a perturbed measure:
 # ```math
-# \cos\theta_n = \dfrac{\langle L_n, L_{n+k}\rangle}{\|L_n|_2 \|L_{n+k}\|_2},\quad{\rm for}\quad 0\le n < N-k,
+# \cos\theta_n = \frac{\langle L_n, L_{n+k}\rangle}{\|L_n\|_2 \|L_{n+k}\|_2},\quad{\rm for}\quad 0\le n < N-k,
 # ```
 # where the inner product is defined by $\langle f, g\rangle = \int_0^\infty f(x) g(x) x^\beta e^{-x}{\rm\,d}x$.
 #
 # We do so by connecting Laguerre polynomials to the normalized generalized Laguerre polynomials associated with the perturbed measure. It follows by the inner product of the connection coefficients that:
 # ```math
-# \cos\theta_n = \dfrac{(V^\top V)_{n, n+k}}{\sqrt{(V^\top V)_{n, n}(V^\top V)_{n+k, n+k}}}.
+# \cos\theta_n = \frac{(V^\top V)_{n, n+k}}{\sqrt{(V^\top V)_{n, n}(V^\top V)_{n+k, n+k}}}.
 # ```
 #
 using FastTransforms, LinearAlgebra

From 8c30debdb2d52570a04a72196f50cce3c28156cd Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 26 Dec 2020 14:25:42 -0600
Subject: [PATCH 058/222] CompatHelper: bump compat for "AbstractFFTs" to "1.0"
 (#127)

* CompatHelper: bump compat for "AbstractFFTs" to "1.0"

* give compathelper an ssh key

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
---
 .github/workflows/CompatHelper.yml | 1 +
 Project.toml                       | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index d486cb1b..4b384d23 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -21,5 +21,6 @@ jobs:
       - name: CompatHelper.main
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
           JULIA_DEBUG: CompatHelper
         run: julia -e 'using CompatHelper; CompatHelper.main()'
diff --git a/Project.toml b/Project.toml
index 328dbbb9..df88e0b4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -19,7 +19,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
-AbstractFFTs = "0.5"
+AbstractFFTs = "0.5, 1.0"
 ArrayLayouts = "0.4"
 BinaryProvider = "0.5"
 DSP = "0.6"

From 68e381ce41fd28b8034273b6def04c242745d3a8 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 26 Dec 2020 14:25:57 -0600
Subject: [PATCH 059/222] CompatHelper: bump compat for "Reexport" to "1.0"
 (#128)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index df88e0b4..2c1f2c1f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -27,7 +27,7 @@ FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.4.1"
 FillArrays = "0.9, 0.10"
-Reexport = "0.2"
+Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1"
 ToeplitzMatrices = "0.6"
 julia = "1.3"

From 8625a440488c79e40355ab966bb805dc9650cfde Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Wed, 6 Jan 2021 15:13:16 +0000
Subject: [PATCH 060/222] Support new ArrayLayouts, FillArrays

---
 Project.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Project.toml b/Project.toml
index 2c1f2c1f..0a74b23d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.11.1"
+version = "0.11.2"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -20,13 +20,13 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "0.5, 1.0"
-ArrayLayouts = "0.4"
+ArrayLayouts = "0.4, 0.5"
 BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.4.1"
-FillArrays = "0.9, 0.10"
+FillArrays = "0.9, 0.10, 0.11"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1"
 ToeplitzMatrices = "0.6"

From a603718579644da85f6166e3d3280b2bc942d814 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 4 Feb 2021 12:58:27 -0600
Subject: [PATCH 061/222] add example for raw automatic differentiation without
 special types

#133
---
 docs/make.jl                         |  2 ++
 examples/automaticdifferentiation.jl | 43 ++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 examples/automaticdifferentiation.jl

diff --git a/docs/make.jl b/docs/make.jl
index 2462159b..534ff439 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -6,6 +6,7 @@ const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
 const OUTPUT_DIR   = joinpath(@__DIR__, "src/generated")
 
 examples = [
+    "automaticdifferentiation.jl",
     "chebyshev.jl",
     "disk.jl",
     "nonlocaldiffusion.jl",
@@ -36,6 +37,7 @@ makedocs(
             pages = Any[
                     "Home" => "index.md",
                     "Examples" => [
+                        "generated/automaticdifferentiation.md",
                         "generated/chebyshev.md",
                         "generated/disk.md",
                         "generated/nonlocaldiffusion.md",
diff --git a/examples/automaticdifferentiation.jl b/examples/automaticdifferentiation.jl
new file mode 100644
index 00000000..2c5f8f82
--- /dev/null
+++ b/examples/automaticdifferentiation.jl
@@ -0,0 +1,43 @@
+# # Automatic differentiation through spherical harmonic transforms
+# This example finds a positive value of $\lambda$ in:
+# ```math
+# f(r) = \sin[\lambda (k\cdot r)],
+# ```
+# for some $k,r\in\mathbb{S}^2$ such that $\int_{\mathbb{S}^2} f^2 {\rm\,d}\Omega = 1$.
+# We do this by using derivative information through:
+# ```math
+# \dfrac{\partial f}{\partial \lambda} = (k\cdot r) \cos[\lambda (k\cdot r)].
+# ```
+
+using FastTransforms, LinearAlgebra
+
+# The colatitudinal grid (mod $\pi$):
+N = 15
+θ = (0.5:N-0.5)/N
+
+# The longitudinal grid (mod $\pi$):
+M = 2*N-1
+φ = (0:M-1)*2/M
+
+# We precompute a spherical harmonic--Fourier plan:
+P = plan_sph2fourier(Float64, N)
+
+# And an FFTW Fourier analysis plan on $\mathbb{S}^2$:
+PA = plan_sph_analysis(Float64, N, M)
+
+# Our choice of $k$ and angular parametrization of $r$:
+k = [2/7, 3/7, 6/7]
+r = (θ,φ) -> [sinpi(θ)*cospi(φ), sinpi(θ)*sinpi(φ), cospi(θ)]
+
+# Our initial guess for $\lambda$:
+λ = 1.0
+
+# Then we run Newton iteration and grab an espresso:
+for _ in 1:7
+    F = [sin(λ*(k⋅r(θ,φ))) for θ in θ, φ in φ]
+    Fλ = [(k⋅r(θ,φ))*cos(λ*(k⋅r(θ,φ))) for θ in θ, φ in φ]
+    U = P\(PA*F)
+    Uλ = P\(PA*Fλ)
+    global λ = λ - (norm(U)^2-1)/(2*sum(U.*Uλ))
+    println("λ: $(rpad(λ, 18)) and the 2-norm: $(rpad(norm(U), 18))")
+end

From 1cf16aea396fd09b18c481364b3c7a69cc516ea7 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 5 Feb 2021 12:15:08 -0600
Subject: [PATCH 062/222] add region to generic plans

---
 Project.toml       |   2 +-
 src/fftBigFloat.jl | 172 ++++++++++++++++++++++++++++++---------------
 2 files changed, 117 insertions(+), 57 deletions(-)

diff --git a/Project.toml b/Project.toml
index 0a74b23d..ed99a1a4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.11.2"
+version = "0.11.3"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/fftBigFloat.jl b/src/fftBigFloat.jl
index e43cf056..169d7be3 100644
--- a/src/fftBigFloat.jl
+++ b/src/fftBigFloat.jl
@@ -8,6 +8,42 @@ const ComplexFloats = Complex{T} where T<:AbstractFloat
 # The following implements Bluestein's algorithm, following http://www.dsprelated.com/dspbooks/mdft/Bluestein_s_FFT_Algorithm.html
 # To add more types, add them in the union of the function's signature.
 
+function generic_fft(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
+    region == 1 && (ret = generic_fft(x))
+    ret
+end
+
+function generic_fft!(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
+    region == 1 && (x[:] .= generic_fft(x))
+    x
+end
+
+function generic_fft(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
+    region == 1:1 && (ret = generic_fft(x))
+    ret
+end
+
+function generic_fft!(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
+    region == 1:1 && (x[:] .= generic_fft(x))
+    x
+end
+
+function generic_fft(x::StridedMatrix{T}, region::Integer) where T<:AbstractFloats
+    if region == 1
+        ret = hcat([generic_fft(x[:, j]) for j in 1:size(x, 2)]...)
+    end
+    ret
+end
+
+function generic_fft!(x::StridedMatrix{T}, region::Integer) where T<:AbstractFloats
+    if region == 1
+        for j in 1:size(x, 2)
+            x[:, j] .= generic_fft(x[:, j])
+        end
+    end
+    x
+end
+
 function generic_fft(x::Vector{T}) where T<:AbstractFloats
     T <: FFTW.fftwNumber && (@warn("Using generic fft for FFTW number type."))
     n = length(x)
@@ -18,36 +54,20 @@ function generic_fft(x::Vector{T}) where T<:AbstractFloats
     return Wks.*conv(xq,wq)[n+1:2n]
 end
 
+generic_bfft(x::StridedArray{T, N}, region) where {T <: AbstractFloats, N} = conj!(generic_fft(conj(x), region))
+generic_bfft!(x::StridedArray{T, N}, region) where {T <: AbstractFloats, N} = conj!(generic_fft!(conj!(x), region))
+generic_ifft(x::StridedArray{T, N}, region) where {T<:AbstractFloats, N} = ldiv!(length(x), conj!(generic_fft(conj(x), region)))
+generic_ifft!(x::StridedArray{T, N}, region) where {T<:AbstractFloats, N} = ldiv!(length(x), conj!(generic_fft!(conj!(x), region)))
 
-function generic_fft!(x::Vector{T}) where T<:AbstractFloats
-    x[:] = generic_fft(x)
-    return x
-end
-
-# add rfft for AbstractFloat, by calling fft
-generic_rfft(v::Vector{T}) where T<:AbstractFloats = generic_fft(v)[1:div(length(v),2)+1]
-
-function generic_irfft(v::Vector{T}, n::Integer) where T<:ComplexFloats
+generic_rfft(v::Vector{T}, region) where T<:AbstractFloats = generic_fft(v, region)[1:div(length(v),2)+1]
+function generic_irfft(v::Vector{T}, n::Integer, region) where T<:ComplexFloats
     @assert n==2length(v)-1
     r = Vector{T}(undef, n)
     r[1:length(v)]=v
     r[length(v)+1:end]=reverse(conj(v[2:end]))
-    real(generic_ifft(r))
-end
-
-generic_bfft(x::Vector{T}) where {T <: AbstractFloats} = conj!(generic_fft(conj(x)))
-function generic_bfft!(x::Vector{T}) where {T <: AbstractFloats}
-    x[:] = generic_bfft(x)
-    return x
-end
-
-generic_brfft(v::Vector, n::Integer) = generic_irfft(v, n)*n
-
-generic_ifft(x::Vector{T}) where {T<:AbstractFloats} = conj!(generic_fft(conj(x)))/length(x)
-function generic_ifft!(x::Vector{T}) where T<:AbstractFloats
-    x[:] = generic_ifft(x)
-    return x
+    real(generic_ifft(r, region))
 end
+generic_brfft(v::StridedArray, n::Integer, region) = generic_irfft(v, n, region)*n
 
 function conv(u::StridedVector{T}, v::StridedVector{T}) where T<:AbstractFloats
     nu,nv = length(u),length(v)
@@ -112,6 +132,46 @@ function generic_ifft_pow2(x::Vector{Complex{T}}) where T<:AbstractFloat
     return complex.(y[1:2:end],-y[2:2:end])/length(x)
 end
 
+function generic_dct(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
+    region == 1 && (ret = generic_dct(x))
+    ret
+end
+
+function generic_dct!(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
+    region == 1 && (x[:] .= generic_dct(x))
+    x
+end
+
+function generic_idct(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
+    region == 1 && (ret = generic_idct(x))
+    ret
+end
+
+function generic_idct!(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
+    region == 1 && (x[:] .= generic_idct(x))
+    x
+end
+
+function generic_dct(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
+    region == 1:1 && (ret = generic_dct(x))
+    ret
+end
+
+function generic_dct!(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
+    region == 1:1 && (x[:] .= generic_dct(x))
+    x
+end
+
+function generic_idct(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
+    region == 1:1 && (ret = generic_idct(x))
+    ret
+end
+
+function generic_idct!(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
+    region == 1:1 && (x[:] .= generic_idct(x))
+    x
+end
+
 function generic_dct(a::AbstractVector{Complex{T}}) where {T <: AbstractFloat}
     T <: FFTW.fftwNumber && (@warn("Using generic dct for FFTW number type."))
     N = length(a)
@@ -139,8 +199,6 @@ end
 
 generic_idct(a::AbstractArray{T}) where {T <: AbstractFloat} = real(generic_idct(complex(a)))
 
-generic_dct!(a::AbstractArray{T}) where {T<:AbstractFloats} = (b = generic_dct(a); a[:] = b)
-generic_idct!(a::AbstractArray{T}) where {T<:AbstractFloats} = (b = generic_idct(a); a[:] = b)
 
 # These lines mimick the corresponding ones in FFTW/src/dct.jl, but with
 # AbstractFloat rather than fftwNumber.
@@ -157,18 +215,20 @@ abstract type DummyPlan{T} <: Plan{T} end
 for P in (:DummyFFTPlan, :DummyiFFTPlan, :DummybFFTPlan, :DummyDCTPlan, :DummyiDCTPlan)
     # All plans need an initially undefined pinv field
     @eval begin
-        mutable struct $P{T,inplace} <: DummyPlan{T}
+        mutable struct $P{T,inplace,G} <: DummyPlan{T}
+            region::G # region (iterable) of dims that are transformed
             pinv::DummyPlan{T}
-            $P{T,inplace}() where {T<:AbstractFloats, inplace} = new()
+            $P{T,inplace,G}(region::G) where {T<:AbstractFloats, inplace, G} = new(region)
         end
     end
 end
 for P in (:DummyrFFTPlan, :DummyirFFTPlan, :DummybrFFTPlan)
     @eval begin
-        mutable struct $P{T,inplace} <: DummyPlan{T}
+        mutable struct $P{T,inplace,G} <: DummyPlan{T}
             n::Integer
+            region::G # region (iterable) of dims that are transformed
             pinv::DummyPlan{T}
-            $P{T,inplace}(n::Integer) where {T<:AbstractFloats, inplace} = new(n)
+            $P{T,inplace,G}(n::Integer, region::G) where {T<:AbstractFloats, inplace, G} = new(n, region)
         end
     end
 end
@@ -176,14 +236,14 @@ end
 for (Plan,iPlan) in ((:DummyFFTPlan,:DummyiFFTPlan),
                      (:DummyDCTPlan,:DummyiDCTPlan))
    @eval begin
-       plan_inv(::$Plan{T,inplace}) where {T,inplace} = $iPlan{T,inplace}()
-       plan_inv(::$iPlan{T,inplace}) where {T,inplace} = $Plan{T,inplace}()
+       plan_inv(p::$Plan{T,inplace,G}) where {T,inplace,G} = $iPlan{T,inplace,G}(p.region)
+       plan_inv(p::$iPlan{T,inplace,G}) where {T,inplace,G} = $Plan{T,inplace,G}(p.region)
     end
 end
 
 # Specific for rfft, irfft and brfft:
-plan_inv(p::DummyirFFTPlan{T,inplace}) where {T,inplace} = DummyrFFTPlan{T,Inplace}(p.n)
-plan_inv(p::DummyrFFTPlan{T,inplace}) where {T,inplace} = DummyirFFTPlan{T,Inplace}(p.n)
+plan_inv(p::DummyirFFTPlan{T,inplace,G}) where {T,inplace,G} = DummyrFFTPlan{T,Inplace,G}(p.n, p.region)
+plan_inv(p::DummyrFFTPlan{T,inplace,G}) where {T,inplace,G} = DummyirFFTPlan{T,Inplace,G}(p.n, p.region)
 
 
 
@@ -194,26 +254,26 @@ for (Plan,ff,ff!) in ((:DummyFFTPlan,:generic_fft,:generic_fft!),
                       (:DummyDCTPlan,:generic_dct,:generic_dct!),
                       (:DummyiDCTPlan,:generic_idct,:generic_idct!))
     @eval begin
-        *(p::$Plan{T,true}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = $ff!(x)
-        *(p::$Plan{T,false}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = $ff(x)
+        *(p::$Plan{T,true}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = $ff!(x, p.region)
+        *(p::$Plan{T,false}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = $ff(x, p.region)
         function mul!(C::StridedVector, p::$Plan, x::StridedVector)
-            C[:] = $ff(x)
+            C[:] = $ff(x, p.region)
             C
         end
     end
 end
 
 # Specific for irfft and brfft:
-*(p::DummyirFFTPlan{T,true}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_irfft!(x, p.n)
-*(p::DummyirFFTPlan{T,false}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_irfft(x, p.n)
+*(p::DummyirFFTPlan{T,true}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_irfft!(x, p.n, p.region)
+*(p::DummyirFFTPlan{T,false}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_irfft(x, p.n, p.region)
 function mul!(C::StridedVector, p::DummyirFFTPlan, x::StridedVector)
-    C[:] = generic_irfft(x, p.n)
+    C[:] = generic_irfft(x, p.n, p.region)
     C
 end
-*(p::DummybrFFTPlan{T,true}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_brfft!(x, p.n)
-*(p::DummybrFFTPlan{T,false}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_brfft(x, p.n)
+*(p::DummybrFFTPlan{T,true}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_brfft!(x, p.n, p.region)
+*(p::DummybrFFTPlan{T,false}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_brfft(x, p.n, p.region)
 function mul!(C::StridedVector, p::DummybrFFTPlan, x::StridedVector)
-    C[:] = generic_brfft(x, p.n)
+    C[:] = generic_brfft(x, p.n, p.region)
     C
 end
 
@@ -233,27 +293,27 @@ AbstractFFTs._fftfloat(::Type{T}) where {T <: AbstractFloat} = T
 # This is the reason for using StridedArray below. We also have to carefully
 # distinguish between real and complex arguments.
 
-plan_fft(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyFFTPlan{Complex{real(T)},false}()
-plan_fft!(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyFFTPlan{Complex{real(T)},true}()
+plan_fft(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyFFTPlan{Complex{real(T)},false,typeof(region)}(region)
+plan_fft!(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyFFTPlan{Complex{real(T)},true,typeof(region)}(region)
 
-plan_bfft(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummybFFTPlan{Complex{real(T)},false}()
-plan_bfft!(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummybFFTPlan{Complex{real(T)},true}()
+plan_bfft(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummybFFTPlan{Complex{real(T)},false,typeof(region)}(region)
+plan_bfft!(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummybFFTPlan{Complex{real(T)},true,typeof(region)}(region)
 
 # The ifft plans are automatically provided in terms of the bfft plans above.
-# plan_ifft(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyiFFTPlan{Complex{real(T)},false}()
-# plan_ifft!(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyiFFTPlan{Complex{real(T)},true}()
+# plan_ifft(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyiFFTPlan{Complex{real(T)},false,typeof(region)}(region)
+# plan_ifft!(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyiFFTPlan{Complex{real(T)},true,typeof(region)}(region)
 
-plan_dct(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyDCTPlan{T,false}()
-plan_dct!(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyDCTPlan{T,true}()
+plan_dct(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyDCTPlan{T,false,typeof(region)}(region)
+plan_dct!(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyDCTPlan{T,true,typeof(region)}(region)
 
-plan_idct(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyiDCTPlan{T,false}()
-plan_idct!(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyiDCTPlan{T,true}()
+plan_idct(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyiDCTPlan{T,false,typeof(region)}(region)
+plan_idct!(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyiDCTPlan{T,true,typeof(region)}(region)
 
-plan_rfft(x::StridedArray{T}, region) where {T <: RealFloats} = DummyrFFTPlan{Complex{real(T)},false}(length(x))
-plan_brfft(x::StridedArray{T}, n::Integer, region) where {T <: ComplexFloats} = DummybrFFTPlan{Complex{real(T)},false}(n)
+plan_rfft(x::StridedArray{T}, region) where {T <: RealFloats} = DummyrFFTPlan{Complex{real(T)},false,typeof(region)}(length(x), region)
+plan_brfft(x::StridedArray{T}, n::Integer, region) where {T <: ComplexFloats} = DummybrFFTPlan{Complex{real(T)},false,typeof(region)}(n, region)
 
 # A plan for irfft is created in terms of a plan for brfft.
-# plan_irfft(x::StridedArray{T}, n::Integer, region) where {T <: ComplexFloats} = DummyirFFTPlan{Complex{real(T)},false}(n)
+# plan_irfft(x::StridedArray{T}, n::Integer, region) where {T <: ComplexFloats} = DummyirFFTPlan{Complex{real(T)},false,typeof(region)}(n, region)
 
 # These don't exist for now:
 # plan_rfft!(x::StridedArray{T}) where {T <: RealFloats} = DummyrFFTPlan{Complex{real(T)},true}()

From ef06b369217087e6a43f9f6bde3945aa861e8301 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 8 Feb 2021 11:35:19 -0600
Subject: [PATCH 063/222] some rearrangements

close the longest open pull request? #89, fixing #67

also, stop the type piracy of DSP's conv for certain bitstype floating-point types
---
 src/FastTransforms.jl         | 50 +++++++++++++++++++----------------
 src/fftBigFloat.jl            | 27 ++++++++++++-------
 src/specialfunctions.jl       |  2 ++
 test/fftBigFloattests.jl      |  8 +++++-
 test/gaunttests.jl            |  2 +-
 test/nuffttests.jl            |  4 +--
 test/quadraturetests.jl       |  2 +-
 test/specialfunctionstests.jl |  2 +-
 8 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 3a382be0..40c7bca1 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,24 +1,23 @@
 module FastTransforms
 
-using FastGaussQuadrature, LinearAlgebra
-using Reexport, SpecialFunctions, ToeplitzMatrices, FillArrays, ArrayLayouts
+using ArrayLayouts, FastGaussQuadrature, FillArrays, LinearAlgebra,
+      Reexport, SpecialFunctions, ToeplitzMatrices
 
 import DSP
 
 @reexport using AbstractFFTs
 @reexport using FFTW
 
-import Base: unsafe_convert, eltype, ndims, adjoint, transpose, show, *, \,
-             inv, length, size, view, getindex, convert
+import Base: convert, unsafe_convert, eltype, ndims, adjoint, transpose, show,
+             *, \, inv, length, size, view, getindex
 
 import Base.GMP: Limb
 
 import AbstractFFTs: Plan, ScaledPlan,
-                     fft, ifft, bfft, fft!, ifft!, bfft!,
-                     plan_fft, plan_ifft, plan_bfft, plan_fft!, plan_ifft!, plan_bfft!,
-                     rfft, irfft, brfft, plan_rfft, plan_irfft, plan_brfft,
-                     fftshift, ifftshift,
-                     rfft_output_size, brfft_output_size,
+                     fft, ifft, bfft, fft!, ifft!, bfft!, rfft, irfft, brfft,
+                     plan_fft, plan_ifft, plan_bfft, plan_fft!, plan_ifft!,
+                     plan_bfft!, plan_rfft, plan_irfft, plan_brfft,
+                     fftshift, ifftshift, rfft_output_size, brfft_output_size,
                      plan_inv, normalization
 
 import DSP: conv
@@ -53,28 +52,37 @@ include("clenshaw.jl")
 
 include("libfasttransforms.jl")
 
-export plan_nufft, plan_nufft1, plan_nufft2, plan_nufft3, plan_inufft1, plan_inufft2
 export nufft, nufft1, nufft2, nufft3, inufft1, inufft2
 
+export plan_nufft, plan_nufft1, plan_nufft2, plan_nufft3,
+       plan_inufft1, plan_inufft2
+
 include("nufft.jl")
 include("inufft.jl")
 
 export paduatransform, ipaduatransform, paduatransform!, ipaduatransform!,
-       paduapoints, plan_paduatransform!, plan_ipaduatransform!
+       paduapoints
+
+export plan_paduatransform!, plan_ipaduatransform!
 
 include("PaduaTransform.jl")
 
-export plan_chebyshevtransform, plan_ichebyshevtransform, plan_chebyshevtransform!, plan_ichebyshevtransform!,
-            chebyshevtransform, ichebyshevtransform, chebyshevpoints,
-            plan_chebyshevutransform, plan_ichebyshevutransform, plan_chebyshevutransform!, plan_ichebyshevutransform!,
-            chebyshevutransform, ichebyshevutransform,
-            chebyshevtransform!, ichebyshevtransform!, chebyshevutransform!, ichebyshevutransform!
+export chebyshevtransform, ichebyshevtransform,
+       chebyshevtransform!, ichebyshevtransform!,
+       chebyshevutransform, ichebyshevutransform,
+       chebyshevutransform!, ichebyshevutransform!, chebyshevpoints
+
+export plan_chebyshevtransform, plan_ichebyshevtransform,
+       plan_chebyshevtransform!, plan_ichebyshevtransform!,
+       plan_chebyshevutransform, plan_ichebyshevutransform,
+       plan_chebyshevutransform!, plan_ichebyshevutransform!
 
 include("chebyshevtransform.jl")
 
-export plan_clenshawcurtis, clenshawcurtisnodes, clenshawcurtisweights
-export plan_fejer1, fejernodes1, fejerweights1,
-       plan_fejer2, fejernodes2, fejerweights2
+export clenshawcurtisnodes, clenshawcurtisweights, fejernodes1, fejerweights1,
+       fejernodes2, fejerweights2
+
+export plan_clenshawcurtis, plan_fejer1, plan_fejer2
 
 include("clenshawcurtis.jl")
 include("fejer.jl")
@@ -97,10 +105,6 @@ export sphones, sphzeros, sphrand, sphrandn, sphevaluate,
        tetones, tetzeros, tetrand, tetrandn,
        spinsphones, spinsphzeros, spinsphrand, spinsphrandn
 
-lgamma(x) = logabsgamma(x)[1]
-
 include("specialfunctions.jl")
 
-
-
 end # module
diff --git a/src/fftBigFloat.jl b/src/fftBigFloat.jl
index 169d7be3..64b7807e 100644
--- a/src/fftBigFloat.jl
+++ b/src/fftBigFloat.jl
@@ -51,7 +51,7 @@ function generic_fft(x::Vector{T}) where T<:AbstractFloats
     ks = range(zero(real(T)),stop=n-one(real(T)),length=n)
     Wks = exp.((-im).*convert(T,π).*ks.^2 ./ n)
     xq, wq = x.*Wks, conj([exp(-im*convert(T,π)*n);reverse(Wks);Wks[2:end]])
-    return Wks.*conv(xq,wq)[n+1:2n]
+    return Wks.*_conv!(xq,wq)[n+1:2n]
 end
 
 generic_bfft(x::StridedArray{T, N}, region) where {T <: AbstractFloats, N} = conj!(generic_fft(conj(x), region))
@@ -69,16 +69,23 @@ function generic_irfft(v::Vector{T}, n::Integer, region) where T<:ComplexFloats
 end
 generic_brfft(v::StridedArray, n::Integer, region) = generic_irfft(v, n, region)*n
 
-function conv(u::StridedVector{T}, v::StridedVector{T}) where T<:AbstractFloats
-    nu,nv = length(u),length(v)
+function _conv!(u::StridedVector{T}, v::StridedVector{T}) where T<:AbstractFloats
+    nu = length(u)
+    nv = length(v)
     n = nu + nv - 1
-    np2 = nextpow(2,n)
-    append!(u,zeros(T,np2-nu)),append!(v,zeros(T,np2-nv))
+    np2 = nextpow(2, n)
+    append!(u, zeros(T, np2-nu))
+    append!(v, zeros(T, np2-nv))
     y = generic_ifft_pow2(generic_fft_pow2(u).*generic_fft_pow2(v))
     #TODO This would not handle Dual/ComplexDual numbers correctly
     y = T<:Real ? real(y[1:n]) : y[1:n]
 end
 
+conv(u::AbstractArray{T, N}, v::AbstractArray{T, N}) where {T<:AbstractFloat, N} = _conv!(deepcopy(u), deepcopy(v))
+conv(u::AbstractArray{T, N}, v::AbstractArray{Complex{T}, N}) where {T<:AbstractFloat, N} = _conv!(complex(deepcopy(u)), deepcopy(v))
+conv(u::AbstractArray{Complex{T}, N}, v::AbstractArray{T, N}) where {T<:AbstractFloat, N} = _conv!(deepcopy(u), complex(deepcopy(v)))
+conv(u::AbstractArray{Complex{T}, N}, v::AbstractArray{Complex{T}, N}) where {T<:AbstractFloat, N} = _conv!(deepcopy(u), deepcopy(v))
+
 # This is a Cooley-Tukey FFT algorithm inspired by many widely available algorithms including:
 # c_radix2.c in the GNU Scientific Library and four1 in the Numerical Recipes in C.
 # However, the trigonometric recurrence is improved for greater efficiency.
@@ -120,16 +127,16 @@ function generic_fft_pow2!(x::Vector{T}) where T<:AbstractFloat
 end
 
 function generic_fft_pow2(x::Vector{Complex{T}}) where T<:AbstractFloat
-    y = interlace(real(x),imag(x))
+    y = interlace(real(x), imag(x))
     generic_fft_pow2!(y)
-    return complex.(y[1:2:end],y[2:2:end])
+    return complex.(y[1:2:end], y[2:2:end])
 end
-generic_fft_pow2(x::Vector{T}) where {T<:AbstractFloat} = generic_fft_pow2(complex(x))
+generic_fft_pow2(x::Vector{T}) where T<:AbstractFloat = generic_fft_pow2(complex(x))
 
 function generic_ifft_pow2(x::Vector{Complex{T}}) where T<:AbstractFloat
-    y = interlace(real(x),-imag(x))
+    y = interlace(real(x), -imag(x))
     generic_fft_pow2!(y)
-    return complex.(y[1:2:end],-y[2:2:end])/length(x)
+    return ldiv!(length(x), conj!(complex.(y[1:2:end], y[2:2:end])))
 end
 
 function generic_dct(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
diff --git a/src/specialfunctions.jl b/src/specialfunctions.jl
index f50637a1..62b47b16 100644
--- a/src/specialfunctions.jl
+++ b/src/specialfunctions.jl
@@ -56,6 +56,8 @@ function pochhammer(x::Number,n::UnitRange{T}) where T<:Real
     ret
 end
 
+lgamma(x) = logabsgamma(x)[1]
+
 ogamma(x::Number) = (isinteger(x) && x<0) ? zero(float(x)) : inv(gamma(x))
 
 """
diff --git a/test/fftBigFloattests.jl b/test/fftBigFloattests.jl
index d1f5a1fe..2dfd2988 100644
--- a/test/fftBigFloattests.jl
+++ b/test/fftBigFloattests.jl
@@ -1,4 +1,4 @@
-using FastTransforms, FFTW, Test
+using DSP, FFTW, FastTransforms, LinearAlgebra, Test
 
 @testset "BigFloat FFT and DCT" begin
 
@@ -9,6 +9,12 @@ using FastTransforms, FFTW, Test
     c = collect(range(-big(1.0),stop=1.0,length=201))
     @test norm(ifft(fft(c))-c) < 200norm(c)eps(BigFloat)
 
+    s = big(1) ./ (1:10)
+    s64 = Float64.(s)
+    @test Float64.(conv(s, s)) ≈ conv(s64, s64)
+    @test s == big(1) ./ (1:10) #67, ensure conv doesn't overwrite input
+    @test all(s64 .=== Float64.(big(1) ./ (1:10)))
+
     p = plan_dct(c)
     @test norm(FastTransforms.generic_dct(c) - p*c) == 0
 
diff --git a/test/gaunttests.jl b/test/gaunttests.jl
index d1f4699b..5f194eff 100644
--- a/test/gaunttests.jl
+++ b/test/gaunttests.jl
@@ -1,4 +1,4 @@
-using FastTransforms, Test
+using FastTransforms, LinearAlgebra, Test
 
 import FastTransforms: δ
 
diff --git a/test/nuffttests.jl b/test/nuffttests.jl
index 36fd6318..ee3c4689 100644
--- a/test/nuffttests.jl
+++ b/test/nuffttests.jl
@@ -1,4 +1,4 @@
-using FastTransforms, Test, FFTW
+using FFTW, FastTransforms, LinearAlgebra, Test
 
 FFTW.set_num_threads(ceil(Int, Sys.CPU_THREADS/2))
 
@@ -75,7 +75,7 @@ FFTW.set_num_threads(ceil(Int, Sys.CPU_THREADS/2))
         fftc = fft(c)
         if Sys.WORD_SIZE == 64
             @test_skip norm(nufft1(c, ω, ϵ) - fftc) == 0 # skip because fftw3 seems to change this
-            @test_skip norm(nufft2(c, x, ϵ) - fftc) == 0 # skip because fftw3 seems to change this
+            @test norm(nufft2(c, x, ϵ) - fftc) == 0
             @test_skip norm(nufft3(c, x, ω, ϵ) - fftc) == 0 # skip because fftw3 seems to change this
         end
         err_bnd = 500*eps(Float64)*norm(c)
diff --git a/test/quadraturetests.jl b/test/quadraturetests.jl
index 7956d7f0..225c7275 100644
--- a/test/quadraturetests.jl
+++ b/test/quadraturetests.jl
@@ -1,4 +1,4 @@
-using FastTransforms, Test
+using FastTransforms, LinearAlgebra, Test
 
 import FastTransforms: chebyshevmoments1, chebyshevmoments2,
                        chebyshevjacobimoments1, chebyshevjacobimoments2,
diff --git a/test/specialfunctionstests.jl b/test/specialfunctionstests.jl
index 014958a3..febad808 100644
--- a/test/specialfunctionstests.jl
+++ b/test/specialfunctionstests.jl
@@ -1,4 +1,4 @@
-using FastTransforms, Test
+using FastTransforms, LinearAlgebra, Test
 
 import FastTransforms: pochhammer, sqrtpi, SpecialFunctions.gamma
 import FastTransforms: Cnλ, Λ, lambertw, Cnαβ, Anαβ

From c3da771761ddfb061eb49c0c12c2b5d7bebd5a65 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Tue, 9 Feb 2021 15:37:51 -0600
Subject: [PATCH 064/222] start 2D adjoint plans

tests will fail unless built from source.
---
 src/libfasttransforms.jl | 117 ++++++++++++++++++++++++++++++---------
 1 file changed, 92 insertions(+), 25 deletions(-)

diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index bc8cd3fc..7a09e6d0 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -248,8 +248,7 @@ destroy_plan(p::FTPlan{Float64, 1}) = ccall((:ft_destroy_tb_eigen_FMM, libfasttr
 destroy_plan(p::FTPlan{BigFloat, 1}) = ccall((:ft_mpfr_destroy_plan, libfasttransforms), Cvoid, (Ptr{mpfr_t}, Cint), p, p.n)
 destroy_plan(p::FTPlan{Float32, 1, ASSOCIATEDJAC2JAC}) = ccall((:ft_destroy_btb_eigen_FMMf, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 1, ASSOCIATEDJAC2JAC}) = ccall((:ft_destroy_btb_eigen_FMM, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
-destroy_plan(p::FTPlan{Float64, 2}) = ccall((:ft_destroy_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
-destroy_plan(p::FTPlan{Float64, 3}) = ccall((:ft_destroy_tetrahedral_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float64}) = ccall((:ft_destroy_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHERE}) = ccall((:ft_destroy_spin_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, SPHERESYNTHESIS}) = ccall((:ft_destroy_sphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, SPHEREANALYSIS}) = ccall((:ft_destroy_sphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
@@ -267,47 +266,63 @@ destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHERESYNTHESIS}) = ccall((:ft_d
 destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHEREANALYSIS}) = ccall((:ft_destroy_spinsphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, SPHERICALISOMETRY}) = ccall((:ft_destroy_sph_isometry_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 
-struct AdjointFTPlan{T, S}
+struct AdjointFTPlan{T, S, R}
     parent::S
+    adjoint::R
+    function AdjointFTPlan{T, S, R}(parent::S) where {T, S, R}
+        new(parent)
+    end
+    function AdjointFTPlan{T, S, R}(parent::S, adjoint::R) where {T, S, R}
+        new(parent, adjoint)
+    end
 end
 
-AdjointFTPlan(p::FTPlan) = AdjointFTPlan{eltype(p), typeof(p)}(p)
+AdjointFTPlan(p::FTPlan) = AdjointFTPlan{eltype(p), typeof(p), typeof(p)}(p)
+AdjointFTPlan(p::FTPlan, q::FTPlan) = AdjointFTPlan{eltype(q), typeof(p), typeof(q)}(p, q)
 
 adjoint(p::FTPlan) = AdjointFTPlan(p)
 adjoint(p::AdjointFTPlan) = p.parent
 
-eltype(p::AdjointFTPlan{T, S}) where {T, S} = T
-ndims(p::AdjointFTPlan{T, S}) where {T, S} = ndims(p.parent)
-function show(io::IO, p::AdjointFTPlan{T, S}) where {T, S}
+eltype(p::AdjointFTPlan{T}) where T = T
+ndims(p::AdjointFTPlan) = ndims(p.parent)
+function show(io::IO, p::AdjointFTPlan)
     print(io, "Adjoint ")
     show(io, p.parent)
 end
 
 checksize(p::AdjointFTPlan, x) = checksize(p.parent, x)
 
-unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::AdjointFTPlan{T, FTPlan{T, N, K}}) where {T, N, K} = unsafe_convert(Ptr{ft_plan_struct}, p.parent)
-unsafe_convert(::Type{Ptr{mpfr_t}}, p::AdjointFTPlan{T, FTPlan{T, N, K}}) where {T, N, K} = unsafe_convert(Ptr{mpfr_t}, p.parent)
+unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::AdjointFTPlan) = unsafe_convert(Ptr{ft_plan_struct}, p.parent)
+unsafe_convert(::Type{Ptr{mpfr_t}}, p::AdjointFTPlan) = unsafe_convert(Ptr{mpfr_t}, p.parent)
 
-struct TransposeFTPlan{T, S}
+struct TransposeFTPlan{T, S, R}
     parent::S
+    transpose::R
+    function TransposeFTPlan{T, S, R}(parent::S) where {T, S, R}
+        new(parent)
+    end
+    function TransposeFTPlan{T, S, R}(parent::S, transpose::R) where {T, S, R}
+        new(parent, transpose)
+    end
 end
 
-TransposeFTPlan(p::FTPlan) = TransposeFTPlan{eltype(p), typeof(p)}(p)
+TransposeFTPlan(p::FTPlan) = TransposeFTPlan{eltype(p), typeof(p), typeof(p)}(p)
+TransposeFTPlan(p::FTPlan, q::FTPlan) = TransposeFTPlan{eltype(q), typeof(p), typeof(q)}(p, q)
 
 transpose(p::FTPlan) = TransposeFTPlan(p)
 transpose(p::TransposeFTPlan) = p.parent
 
-eltype(p::TransposeFTPlan{T, S}) where {T, S} = T
-ndims(p::TransposeFTPlan{T, S}) where {T, S} = ndims(p.parent)
-function show(io::IO, p::TransposeFTPlan{T, S}) where {T, S}
+eltype(p::TransposeFTPlan{T}) where T = T
+ndims(p::TransposeFTPlan) = ndims(p.parent)
+function show(io::IO, p::TransposeFTPlan)
     print(io, "Transpose ")
     show(io, p.parent)
 end
 
 checksize(p::TransposeFTPlan, x) = checksize(p.parent, x)
 
-unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::TransposeFTPlan{T, FTPlan{T, N, K}}) where {T, N, K} = unsafe_convert(Ptr{ft_plan_struct}, p.parent)
-unsafe_convert(::Type{Ptr{mpfr_t}}, p::TransposeFTPlan{T, FTPlan{T, N, K}}) where {T, N, K} = unsafe_convert(Ptr{mpfr_t}, p.parent)
+unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::TransposeFTPlan) = unsafe_convert(Ptr{ft_plan_struct}, p.parent)
+unsafe_convert(::Type{Ptr{mpfr_t}}, p::TransposeFTPlan) = unsafe_convert(Ptr{mpfr_t}, p.parent)
 
 for f in (:leg2cheb, :cheb2leg, :ultra2ultra, :jac2jac,
           :lag2lag, :jac2ultra, :ultra2jac, :jac2cheb,
@@ -547,11 +562,44 @@ function plan_spinsph2fourier(::Type{Complex{Float64}}, n::Integer, s::Integer)
     return FTPlan{Complex{Float64}, 2, SPINSPHERE}(plan, n)
 end
 
-for (fJ, fC, fE, K) in ((:plan_sph_synthesis, :ft_plan_sph_synthesis, :ft_execute_sph_synthesis, SPHERESYNTHESIS),
-                    (:plan_sph_analysis, :ft_plan_sph_analysis, :ft_execute_sph_analysis, SPHEREANALYSIS),
-                    (:plan_sphv_synthesis, :ft_plan_sphv_synthesis, :ft_execute_sphv_synthesis, SPHEREVSYNTHESIS),
-                    (:plan_sphv_analysis, :ft_plan_sphv_analysis, :ft_execute_sphv_analysis, SPHEREVANALYSIS),
-                    (:plan_disk_synthesis, :ft_plan_disk_synthesis, :ft_execute_disk_synthesis, DISKSYNTHESIS),
+for (fJ, fadJ, fC, fE, K) in ((:plan_sph_synthesis, :plan_sph_analysis, :ft_plan_sph_synthesis, :ft_execute_sph_synthesis, SPHERESYNTHESIS),
+                    (:plan_sph_analysis, :plan_sph_synthesis, :ft_plan_sph_analysis, :ft_execute_sph_analysis, SPHEREANALYSIS),
+                    (:plan_sphv_synthesis, :plan_sphv_analysis, :ft_plan_sphv_synthesis, :ft_execute_sphv_synthesis, SPHEREVSYNTHESIS),
+                    (:plan_sphv_analysis, :plan_sphv_synthesis, :ft_plan_sphv_analysis, :ft_execute_sphv_analysis, SPHEREVANALYSIS))
+    @eval begin
+        $fJ(x::Matrix{T}) where T = $fJ(T, size(x, 1), size(x, 2))
+        $fJ(::Type{Complex{T}}, x...) where T <: Real = $fJ(T, x...)
+        function $fJ(::Type{Float64}, n::Integer, m::Integer)
+            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint), n, m)
+            return FTPlan{Float64, 2, $K}(plan, n, m)
+        end
+        adjoint(p::FTPlan{T, 2, $K}) where T = AdjointFTPlan(p, $fadJ(T, p.n, p.m))
+        transpose(p::FTPlan{T, 2, $K}) where T = TransposeFTPlan(p, $fadJ(T, p.n, p.m))
+        function lmul!(p::FTPlan{Float64, 2, $K}, x::Matrix{Float64})
+            if p.n != size(x, 1) || p.m != size(x, 2)
+                throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
+            end
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function lmul!(p::AdjointFTPlan{Float64, FTPlan{Float64, 2, $K}}, x::Matrix{Float64})
+            if p.adjoint.n != size(x, 1) || p.adjoint.m != size(x, 2)
+                throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
+            end
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p.adjoint, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function lmul!(p::TransposeFTPlan{Float64, FTPlan{Float64, 2, $K}}, x::Matrix{Float64})
+            if p.transpose.n != size(x, 1) || p.transpose.m != size(x, 2)
+                throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
+            end
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p.transpose, x, size(x, 1), size(x, 2))
+            return x
+        end
+    end
+end
+
+for (fJ, fC, fE, K) in ((:plan_disk_synthesis, :ft_plan_disk_synthesis, :ft_execute_disk_synthesis, DISKSYNTHESIS),
                     (:plan_disk_analysis, :ft_plan_disk_analysis, :ft_execute_disk_analysis, DISKANALYSIS),
                     (:plan_rectdisk_synthesis, :ft_plan_rectdisk_synthesis, :ft_execute_rectdisk_synthesis, RECTDISKSYNTHESIS),
                     (:plan_rectdisk_analysis, :ft_plan_rectdisk_analysis, :ft_execute_rectdisk_analysis, RECTDISKANALYSIS),
@@ -788,8 +836,27 @@ end
 for (fJ, fC, K) in ((:lmul!, :ft_execute_sph2fourier, SPHERE),
                     (:ldiv!, :ft_execute_fourier2sph, SPHERE),
                     (:lmul!, :ft_execute_sphv2fourier, SPHEREV),
-                    (:ldiv!, :ft_execute_fourier2sphv, SPHEREV),
-                    (:lmul!, :ft_execute_disk2cxf, DISK),
+                    (:ldiv!, :ft_execute_fourier2sphv, SPHEREV))
+    @eval begin
+        function $fJ(p::FTPlan{Float64, 2, $K}, x::Matrix{Float64})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function $fJ(p::AdjointFTPlan{Float64, FTPlan{Float64, 2, $K}}, x::Matrix{Float64})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function $fJ(p::TransposeFTPlan{Float64, FTPlan{Float64, 2, $K}}, x::Matrix{Float64})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+    end
+end
+
+for (fJ, fC, K) in ((:lmul!, :ft_execute_disk2cxf, DISK),
                     (:ldiv!, :ft_execute_cxf2disk, DISK),
                     (:lmul!, :ft_execute_rectdisk2cheb, RECTDISK),
                     (:ldiv!, :ft_execute_cheb2rectdisk, RECTDISK),
@@ -896,11 +963,11 @@ for fJ in (:lmul!, :ldiv!)
             x .= complex.($fJ(p, real(x)), $fJ(p, imag(x)))
             return x
         end
-        function $fJ(p::AdjointFTPlan{T, FTPlan{T, N, K}}, x::AbstractArray{Complex{T}}) where {T, N, K}
+        function $fJ(p::AdjointFTPlan{T}, x::AbstractArray{Complex{T}}) where T
             x .= complex.($fJ(p, real(x)), $fJ(p, imag(x)))
             return x
         end
-        function $fJ(p::TransposeFTPlan{T, FTPlan{T, N, K}}, x::AbstractArray{Complex{T}}) where {T, N, K}
+        function $fJ(p::TransposeFTPlan{T}, x::AbstractArray{Complex{T}}) where T
             x .= complex.($fJ(p, real(x)), $fJ(p, imag(x)))
             return x
         end

From 3cd41a27e251308ef1dd68cc0d29eb673efb7451 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Tue, 9 Feb 2021 22:01:28 -0600
Subject: [PATCH 065/222] add Adjoint and Transpose to all the harmonic plans

TODO: the accompanying synthesis and analysis
---
 src/libfasttransforms.jl | 74 +++++++++++++++++-----------------------
 1 file changed, 32 insertions(+), 42 deletions(-)

diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 7a09e6d0..0eb6c395 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -833,68 +833,58 @@ for (fJ, fC) in ((:lmul!, :ft_mpfr_trmm_ptr),
     end
 end
 
-for (fJ, fC, K) in ((:lmul!, :ft_execute_sph2fourier, SPHERE),
-                    (:ldiv!, :ft_execute_fourier2sph, SPHERE),
-                    (:lmul!, :ft_execute_sphv2fourier, SPHEREV),
-                    (:ldiv!, :ft_execute_fourier2sphv, SPHEREV))
+for (fJ, fC, T, N, K) in ((:lmul!, :ft_execute_sph2fourier, Float64, 2, SPHERE),
+                    (:ldiv!, :ft_execute_fourier2sph, Float64, 2, SPHERE),
+                    (:lmul!, :ft_execute_sphv2fourier, Float64, 2, SPHEREV),
+                    (:ldiv!, :ft_execute_fourier2sphv, Float64, 2, SPHEREV),
+                    (:lmul!, :ft_execute_spinsph2fourier, Complex{Float64}, 2, SPINSPHERE),
+                    (:ldiv!, :ft_execute_fourier2spinsph, Complex{Float64}, 2, SPINSPHERE),
+                    (:lmul!, :ft_execute_disk2cxf, Float64, 2, DISK),
+                    (:ldiv!, :ft_execute_cxf2disk, Float64, 2, DISK),
+                    (:lmul!, :ft_execute_rectdisk2cheb, Float64, 2, RECTDISK),
+                    (:ldiv!, :ft_execute_cheb2rectdisk, Float64, 2, RECTDISK),
+                    (:lmul!, :ft_execute_tri2cheb, Float64, 2, TRIANGLE),
+                    (:ldiv!, :ft_execute_cheb2tri, Float64, 2, TRIANGLE))
     @eval begin
-        function $fJ(p::FTPlan{Float64, 2, $K}, x::Matrix{Float64})
+        function $fJ(p::FTPlan{$T, $N, $K}, x::Array{$T, $N})
             checksize(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$T}, Cint, Cint), 'N', p, x, size(x)...)
             return x
         end
-        function $fJ(p::AdjointFTPlan{Float64, FTPlan{Float64, 2, $K}}, x::Matrix{Float64})
+        function $fJ(p::AdjointFTPlan{$T, FTPlan{$T, $N, $K}}, x::Array{$T, $N})
             checksize(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$T}, Cint, Cint), 'T', p, x, size(x)...)
             return x
         end
-        function $fJ(p::TransposeFTPlan{Float64, FTPlan{Float64, 2, $K}}, x::Matrix{Float64})
+        function $fJ(p::TransposeFTPlan{$T, FTPlan{$T, $N, $K}}, x::Array{$T, $N})
             checksize(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$T}, Cint, Cint), 'T', p, x, size(x)...)
             return x
         end
     end
 end
 
-for (fJ, fC, K) in ((:lmul!, :ft_execute_disk2cxf, DISK),
-                    (:ldiv!, :ft_execute_cxf2disk, DISK),
-                    (:lmul!, :ft_execute_rectdisk2cheb, RECTDISK),
-                    (:ldiv!, :ft_execute_cheb2rectdisk, RECTDISK),
-                    (:lmul!, :ft_execute_tri2cheb, TRIANGLE),
-                    (:ldiv!, :ft_execute_cheb2tri, TRIANGLE))
+for (fJ, fC) in ((:lmul!, :ft_execute_tet2cheb),
+                 (:ldiv!, :ft_execute_cheb2tet))
     @eval begin
-        function $fJ(p::FTPlan{Float64, 2, $K}, x::Matrix{Float64})
+        function $fJ(p::FTPlan{Float64, 3, TETRAHEDRON}, x::Array{Float64, 3})
             checksize(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), p, x, size(x, 1), size(x, 2))
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'N', p, x, size(x)...)
+            return x
+        end
+        function $fJ(p::AdjointFTPlan{Float64, FTPlan{Float64, 3, TETRAHEDRON}}, x::Array{Float64, 3})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'T', p, x, size(x)...)
+            return x
+        end
+        function $fJ(p::TransposeFTPlan{Float64, FTPlan{Float64, 3, TETRAHEDRON}}, x::Array{Float64, 3})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'T', p, x, size(x)...)
             return x
         end
     end
 end
 
-function lmul!(p::FTPlan{Float64, 3, TETRAHEDRON}, x::Array{Float64, 3})
-    checksize(p, x)
-    ccall((:ft_execute_tet2cheb, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), p, x, size(x, 1), size(x, 2), size(x, 3))
-    return x
-end
-
-function ldiv!(p::FTPlan{Float64, 3, TETRAHEDRON}, x::Array{Float64, 3})
-    checksize(p, x)
-    ccall((:ft_execute_cheb2tet, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), p, x, size(x, 1), size(x, 2), size(x, 3))
-    return x
-end
-
-function lmul!(p::FTPlan{Complex{Float64}, 2, SPINSPHERE}, x::Matrix{Complex{Float64}})
-    checksize(p, x)
-    ccall((:ft_execute_spinsph2fourier, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Complex{Float64}}, Cint, Cint), p, x, size(x, 1), size(x, 2))
-    return x
-end
-
-function ldiv!(p::FTPlan{Complex{Float64}, 2, SPINSPHERE}, x::Matrix{Complex{Float64}})
-    checksize(p, x)
-    ccall((:ft_execute_fourier2spinsph, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Complex{Float64}}, Cint, Cint), p, x, size(x, 1), size(x, 2))
-    return x
-end
-
 function execute_sph_polar_rotation!(x::Matrix{Float64}, α)
     ccall((:ft_execute_sph_polar_rotation, libfasttransforms), Cvoid, (Ptr{Float64}, Cint, Cint, Float64, Float64), x, size(x, 1), size(x, 2), sin(α), cos(α))
     return x

From 4a7e2c89cbb2606d44e50228046b940689b6a6b9 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Tue, 9 Feb 2021 22:06:22 -0600
Subject: [PATCH 066/222] Whoops! Can't splat in a ccall

---
 src/libfasttransforms.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 0eb6c395..91eb601b 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -848,17 +848,17 @@ for (fJ, fC, T, N, K) in ((:lmul!, :ft_execute_sph2fourier, Float64, 2, SPHERE),
     @eval begin
         function $fJ(p::FTPlan{$T, $N, $K}, x::Array{$T, $N})
             checksize(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$T}, Cint, Cint), 'N', p, x, size(x)...)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$T}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
             return x
         end
         function $fJ(p::AdjointFTPlan{$T, FTPlan{$T, $N, $K}}, x::Array{$T, $N})
             checksize(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$T}, Cint, Cint), 'T', p, x, size(x)...)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$T}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
             return x
         end
         function $fJ(p::TransposeFTPlan{$T, FTPlan{$T, $N, $K}}, x::Array{$T, $N})
             checksize(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$T}, Cint, Cint), 'T', p, x, size(x)...)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$T}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
             return x
         end
     end
@@ -869,17 +869,17 @@ for (fJ, fC) in ((:lmul!, :ft_execute_tet2cheb),
     @eval begin
         function $fJ(p::FTPlan{Float64, 3, TETRAHEDRON}, x::Array{Float64, 3})
             checksize(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'N', p, x, size(x)...)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2), size(x, 3))
             return x
         end
         function $fJ(p::AdjointFTPlan{Float64, FTPlan{Float64, 3, TETRAHEDRON}}, x::Array{Float64, 3})
             checksize(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'T', p, x, size(x)...)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2), size(x, 3))
             return x
         end
         function $fJ(p::TransposeFTPlan{Float64, FTPlan{Float64, 3, TETRAHEDRON}}, x::Array{Float64, 3})
             checksize(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'T', p, x, size(x)...)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2), size(x, 3))
             return x
         end
     end

From 17be0ef9f62057ec2dccb7edf85ca66eda4d3463 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 10 Feb 2021 12:50:04 -0600
Subject: [PATCH 067/222] polish off the rest of the adjoints and transposes

---
 src/libfasttransforms.jl       | 245 +++++++++++++++++++--------------
 test/libfasttransformstests.jl |   7 +
 2 files changed, 146 insertions(+), 106 deletions(-)

diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 91eb601b..0fdf681d 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -217,12 +217,20 @@ show(io::IO, p::FTPlan{T, 2, K}) where {T, K} = print(io, "FastTransforms plan f
 show(io::IO, p::FTPlan{T, 3, K}) where {T, K} = print(io, "FastTransforms plan for ", kind2string(K), " for $(p.n)×$(p.l)×$(p.m)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, SPHERICALISOMETRY}) where T = print(io, "FastTransforms ", kind2string(SPHERICALISOMETRY), " plan for $(p.n)×$(2p.n-1)-element array of ", T)
 
-function checksize(p::FTPlan{T}, x::Array{T}) where T
+function checksize(p::FTPlan{T, 1}, x::Array{T}) where T
     if p.n != size(x, 1)
         throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.n), x has leading dimension $(size(x, 1))"))
     end
 end
 
+for (N, K) in ((2, RECTDISK), (2, TRIANGLE), (3, TETRAHEDRON))
+    @eval function checksize(p::FTPlan{T, $N, $K}, x::Array{T, $N}) where T
+        if p.n != size(x, 1)
+            throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.n), x has leading dimension $(size(x, 1))"))
+        end
+    end
+end
+
 for K in (SPHERE, SPHEREV, DISK, SPINSPHERE)
     @eval function checksize(p::FTPlan{T, 2, $K}, x::Matrix{T}) where T
         if p.n != size(x, 1)
@@ -234,6 +242,18 @@ for K in (SPHERE, SPHEREV, DISK, SPINSPHERE)
     end
 end
 
+function checksize(p::FTPlan{T, 2}, x::Array{T, 2}) where T
+    if p.n != size(x, 1) || p.m != size(x, 2)
+        throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
+    end
+end
+
+function checksize(p::FTPlan{T, 3}, x::Array{T, 3}) where T
+    if p.n != size(x, 1) || p.l != size(x, 2) || p.m != size(x, 3)
+        throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.l) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2)) × $(size(x, 3))"))
+    end
+end
+
 function checksize(p::FTPlan{T, 2, SPHERICALISOMETRY}, x::Matrix{T}) where T
     if p.n != size(x, 1) || 2p.n-1 != size(x, 2)
         throw(DimensionMismatch("This FTPlan must operate on arrays of size $(p.n) × $(2p.n-1)."))
@@ -290,10 +310,29 @@ function show(io::IO, p::AdjointFTPlan)
     show(io, p.parent)
 end
 
-checksize(p::AdjointFTPlan, x) = checksize(p.parent, x)
+function checksize(p::AdjointFTPlan, x)
+    try
+        checksize(p.adjoint, x)
+    catch
+        checksize(p.parent, x)
+    end
+end
 
-unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::AdjointFTPlan) = unsafe_convert(Ptr{ft_plan_struct}, p.parent)
-unsafe_convert(::Type{Ptr{mpfr_t}}, p::AdjointFTPlan) = unsafe_convert(Ptr{mpfr_t}, p.parent)
+function unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::AdjointFTPlan)
+    try
+        unsafe_convert(Ptr{ft_plan_struct}, p.adjoint)
+    catch
+        unsafe_convert(Ptr{ft_plan_struct}, p.parent)
+    end
+end
+
+function unsafe_convert(::Type{Ptr{mpfr_t}}, p::AdjointFTPlan)
+    try
+        unsafe_convert(Ptr{mpfr_t}, p.adjoint)
+    catch
+        unsafe_convert(Ptr{mpfr_t}, p.parent)
+    end
+end
 
 struct TransposeFTPlan{T, S, R}
     parent::S
@@ -319,10 +358,29 @@ function show(io::IO, p::TransposeFTPlan)
     show(io, p.parent)
 end
 
-checksize(p::TransposeFTPlan, x) = checksize(p.parent, x)
+function checksize(p::TransposeFTPlan, x)
+    try
+        checksize(p.transpose, x)
+    catch
+        checksize(p.parent, x)
+    end
+end
 
-unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::TransposeFTPlan) = unsafe_convert(Ptr{ft_plan_struct}, p.parent)
-unsafe_convert(::Type{Ptr{mpfr_t}}, p::TransposeFTPlan) = unsafe_convert(Ptr{mpfr_t}, p.parent)
+function unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::TransposeFTPlan)
+    try
+        unsafe_convert(Ptr{ft_plan_struct}, p.transpose)
+    catch
+        unsafe_convert(Ptr{ft_plan_struct}, p.parent)
+    end
+end
+
+function unsafe_convert(::Type{Ptr{mpfr_t}}, p::TransposeFTPlan)
+    try
+        unsafe_convert(Ptr{mpfr_t}, p.transpose)
+    catch
+        unsafe_convert(Ptr{mpfr_t}, p.parent)
+    end
+end
 
 for f in (:leg2cheb, :cheb2leg, :ultra2ultra, :jac2jac,
           :lag2lag, :jac2ultra, :ultra2jac, :jac2cheb,
@@ -563,9 +621,15 @@ function plan_spinsph2fourier(::Type{Complex{Float64}}, n::Integer, s::Integer)
 end
 
 for (fJ, fadJ, fC, fE, K) in ((:plan_sph_synthesis, :plan_sph_analysis, :ft_plan_sph_synthesis, :ft_execute_sph_synthesis, SPHERESYNTHESIS),
-                    (:plan_sph_analysis, :plan_sph_synthesis, :ft_plan_sph_analysis, :ft_execute_sph_analysis, SPHEREANALYSIS),
-                    (:plan_sphv_synthesis, :plan_sphv_analysis, :ft_plan_sphv_synthesis, :ft_execute_sphv_synthesis, SPHEREVSYNTHESIS),
-                    (:plan_sphv_analysis, :plan_sphv_synthesis, :ft_plan_sphv_analysis, :ft_execute_sphv_analysis, SPHEREVANALYSIS))
+                              (:plan_sph_analysis, :plan_sph_synthesis, :ft_plan_sph_analysis, :ft_execute_sph_analysis, SPHEREANALYSIS),
+                              (:plan_sphv_synthesis, :plan_sphv_analysis, :ft_plan_sphv_synthesis, :ft_execute_sphv_synthesis, SPHEREVSYNTHESIS),
+                              (:plan_sphv_analysis, :plan_sphv_synthesis, :ft_plan_sphv_analysis, :ft_execute_sphv_analysis, SPHEREVANALYSIS),
+                              (:plan_disk_synthesis, :plan_disk_analysis, :ft_plan_disk_synthesis, :ft_execute_disk_synthesis, DISKSYNTHESIS),
+                              (:plan_disk_analysis, :plan_disk_synthesis, :ft_plan_disk_analysis, :ft_execute_disk_analysis, DISKANALYSIS),
+                              (:plan_rectdisk_synthesis, :plan_rectdisk_analysis, :ft_plan_rectdisk_synthesis, :ft_execute_rectdisk_synthesis, RECTDISKSYNTHESIS),
+                              (:plan_rectdisk_analysis, :plan_rectdisk_synthesis, :ft_plan_rectdisk_analysis, :ft_execute_rectdisk_analysis, RECTDISKANALYSIS),
+                              (:plan_tri_synthesis, :plan_tri_analysis, :ft_plan_tri_synthesis, :ft_execute_tri_synthesis, TRIANGLESYNTHESIS),
+                              (:plan_tri_analysis, :plan_tri_synthesis, :ft_plan_tri_analysis, :ft_execute_tri_analysis, TRIANGLEANALYSIS))
     @eval begin
         $fJ(x::Matrix{T}) where T = $fJ(T, size(x, 1), size(x, 2))
         $fJ(::Type{Complex{T}}, x...) where T <: Real = $fJ(T, x...)
@@ -576,112 +640,81 @@ for (fJ, fadJ, fC, fE, K) in ((:plan_sph_synthesis, :plan_sph_analysis, :ft_plan
         adjoint(p::FTPlan{T, 2, $K}) where T = AdjointFTPlan(p, $fadJ(T, p.n, p.m))
         transpose(p::FTPlan{T, 2, $K}) where T = TransposeFTPlan(p, $fadJ(T, p.n, p.m))
         function lmul!(p::FTPlan{Float64, 2, $K}, x::Matrix{Float64})
-            if p.n != size(x, 1) || p.m != size(x, 2)
-                throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
-            end
+            checksize(p, x)
             ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
             return x
         end
         function lmul!(p::AdjointFTPlan{Float64, FTPlan{Float64, 2, $K}}, x::Matrix{Float64})
-            if p.adjoint.n != size(x, 1) || p.adjoint.m != size(x, 2)
-                throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
-            end
-            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p.adjoint, x, size(x, 1), size(x, 2))
+            checksize(p, x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
             return x
         end
         function lmul!(p::TransposeFTPlan{Float64, FTPlan{Float64, 2, $K}}, x::Matrix{Float64})
-            if p.transpose.n != size(x, 1) || p.transpose.m != size(x, 2)
-                throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
-            end
-            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p.transpose, x, size(x, 1), size(x, 2))
+            checksize(p, x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
             return x
         end
     end
 end
 
-for (fJ, fC, fE, K) in ((:plan_disk_synthesis, :ft_plan_disk_synthesis, :ft_execute_disk_synthesis, DISKSYNTHESIS),
-                    (:plan_disk_analysis, :ft_plan_disk_analysis, :ft_execute_disk_analysis, DISKANALYSIS),
-                    (:plan_rectdisk_synthesis, :ft_plan_rectdisk_synthesis, :ft_execute_rectdisk_synthesis, RECTDISKSYNTHESIS),
-                    (:plan_rectdisk_analysis, :ft_plan_rectdisk_analysis, :ft_execute_rectdisk_analysis, RECTDISKANALYSIS),
-                    (:plan_tri_synthesis, :ft_plan_tri_synthesis, :ft_execute_tri_synthesis, TRIANGLESYNTHESIS),
-                    (:plan_tri_analysis, :ft_plan_tri_analysis, :ft_execute_tri_analysis, TRIANGLEANALYSIS))
+for (fJ, fadJ, fC, fE, K) in ((:plan_tet_synthesis, :plan_tet_analysis, :ft_plan_tet_synthesis, :ft_execute_tet_synthesis, TETRAHEDRONSYNTHESIS),
+                              (:plan_tet_analysis, :plan_tet_synthesis, :ft_plan_tet_analysis, :ft_execute_tet_analysis, TETRAHEDRONANALYSIS))
     @eval begin
-        $fJ(x::Matrix{T}) where T = $fJ(T, size(x, 1), size(x, 2))
+        $fJ(x::Array{T, 3}) where T = $fJ(T, size(x, 1), size(x, 2), size(x, 3))
         $fJ(::Type{Complex{T}}, x...) where T <: Real = $fJ(T, x...)
-        function $fJ(::Type{Float64}, n::Integer, m::Integer)
-            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint), n, m)
-            return FTPlan{Float64, 2, $K}(plan, n, m)
+        function $fJ(::Type{Float64}, n::Integer, l::Integer, m::Integer)
+            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), n, l, m)
+            return FTPlan{Float64, 3, $K}(plan, n, l, m)
         end
-        function lmul!(p::FTPlan{Float64, 2, $K}, x::Matrix{Float64})
-            if p.n != size(x, 1) || p.m != size(x, 2)
-                throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
-            end
-            ccall(($(string(fE)), libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), p, x, size(x, 1), size(x, 2))
+        adjoint(p::FTPlan{T, 3, $K}) where T = AdjointFTPlan(p, $fadJ(T, p.n, p.l, p.m))
+        transpose(p::FTPlan{T, 3, $K}) where T = TransposeFTPlan(p, $fadJ(T, p.n, p.l, p.m))
+        function lmul!(p::FTPlan{Float64, 3, $K}, x::Array{Float64, 3})
+            checksize(p, x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2), size(x, 3))
+            return x
+        end
+        function lmul!(p::AdjointFTPlan{Float64, FTPlan{Float64, 3, $K}}, x::Array{Float64, 3})
+            checksize(p, x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2), size(x, 3))
+            return x
+        end
+        function lmul!(p::TransposeFTPlan{Float64, FTPlan{Float64, 3, $K}}, x::Array{Float64, 3})
+            checksize(p, x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2), size(x, 3))
             return x
         end
     end
 end
 
-plan_tet_synthesis(x::Array{T, 3}) where T = plan_tet_synthesis(T, size(x, 1), size(x, 2), size(x, 3))
-plan_tet_synthesis(::Type{Complex{T}}, x...) where T <: Real = plan_tet_synthesis(T, x...)
-
-function plan_tet_synthesis(::Type{Float64}, n::Integer, l::Integer, m::Integer)
-    plan = ccall((:ft_plan_tet_synthesis, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), n, l, m)
-    return FTPlan{Float64, 3, TETRAHEDRONSYNTHESIS}(plan, n, l, m)
-end
-
-function lmul!(p::FTPlan{Float64, 3, TETRAHEDRONSYNTHESIS}, x::Array{Float64, 3})
-    if p.n != size(x, 1) || p.l != size(x, 2) || p.m != size(x, 3)
-        throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.l) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2)) × $(size(x, 3))"))
-    end
-    ccall((:ft_execute_tet_synthesis, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), p, x, size(x, 1), size(x, 2), size(x, 3))
-    return x
-end
-
-plan_tet_analysis(x::Array{T, 3}) where T = plan_tet_analysis(T, size(x, 1), size(x, 2), size(x, 3))
-plan_tet_analysis(::Type{Complex{T}}, x...) where T <: Real = plan_tet_analysis(T, x...)
-
-function plan_tet_analysis(::Type{Float64}, n::Integer, l::Integer, m::Integer)
-    plan = ccall((:ft_plan_tet_analysis, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), n, l, m)
-    return FTPlan{Float64, 3, TETRAHEDRONANALYSIS}(plan, n, l, m)
-end
-
-function lmul!(p::FTPlan{Float64, 3, TETRAHEDRONANALYSIS}, x::Array{Float64, 3})
-    if p.n != size(x, 1) || p.l != size(x, 2) || p.m != size(x, 3)
-        throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.l) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2)) × $(size(x, 3))"))
-    end
-    ccall((:ft_execute_tet_analysis, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint, Cint), p, x, size(x, 1), size(x, 2), size(x, 3))
-    return x
-end
-
-plan_spinsph_synthesis(x::Matrix{T}, s::Integer) where T = plan_spinsph_synthesis(T, size(x, 1), size(x, 2), s)
-
-function plan_spinsph_synthesis(::Type{Complex{Float64}}, n::Integer, m::Integer, s::Integer)
-    plan = ccall((:ft_plan_spinsph_synthesis, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), n, m, s)
-    return FTPlan{Complex{Float64}, 2, SPINSPHERESYNTHESIS}(plan, n, m)
-end
-
-function lmul!(p::FTPlan{Complex{Float64}, 2, SPINSPHERESYNTHESIS}, x::Matrix{Complex{Float64}})
-    if p.n != size(x, 1) || p.m != size(x, 2)
-        throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
-    end
-    ccall((:ft_execute_spinsph_synthesis, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), p, x, size(x, 1), size(x, 2))
-    return x
-end
-
-plan_spinsph_analysis(x::Matrix{T}, s::Integer) where T = plan_spinsph_analysis(T, size(x, 1), size(x, 2), s)
-
-function plan_spinsph_analysis(::Type{Complex{Float64}}, n::Integer, m::Integer, s::Integer)
-    plan = ccall((:ft_plan_spinsph_analysis, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), n, m, s)
-    return FTPlan{Complex{Float64}, 2, SPINSPHEREANALYSIS}(plan, n, m)
-end
-
-function lmul!(p::FTPlan{Complex{Float64}, 2, SPINSPHEREANALYSIS}, x::Matrix{Complex{Float64}})
-    if p.n != size(x, 1) || p.m != size(x, 2)
-        throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.m), x has dimensions $(size(x, 1)) × $(size(x, 2))"))
+for (fJ, fadJ, fC, fE, K) in ((:plan_spinsph_synthesis, :plan_spinsph_analysis, :ft_plan_spinsph_synthesis, :ft_execute_spinsph_synthesis, SPINSPHERESYNTHESIS),
+                              (:plan_spinsph_analysis, :plan_spinsph_synthesis, :ft_plan_spinsph_analysis, :ft_execute_spinsph_analysis, SPINSPHEREANALYSIS))
+    @eval begin
+        $fJ(x::Matrix{T}, s::Integer) where T = $fJ(T, size(x, 1), size(x, 2), s)
+        function $fJ(::Type{Complex{Float64}}, n::Integer, m::Integer, s::Integer)
+            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), n, m, s)
+            return FTPlan{Complex{Float64}, 2, $K}(plan, n, m)
+        end
+        get_spin(p::FTPlan{T, 2, $K}) where T = ccall((:ft_get_spin_spinsphere_fftw_plan, libfasttransforms), Cint, (Ptr{ft_plan_struct},), p)
+        adjoint(p::FTPlan{T, 2, $K}) where T = AdjointFTPlan(p, $fadJ(T, p.n, p.m, get_spin(p)))
+        transpose(p::FTPlan{T, 2, $K}) where T = TransposeFTPlan(p, $fadJ(T, p.n, p.m, get_spin(p)))
+        function lmul!(p::FTPlan{Complex{Float64}, 2, $K}, x::Matrix{Complex{Float64}})
+            checksize(p, x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function lmul!(p::AdjointFTPlan{Complex{Float64}, FTPlan{Complex{Float64}, 2, $K}}, x::Matrix{Complex{Float64}})
+            checksize(p, x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'C', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function lmul!(p::TransposeFTPlan{Complex{Float64}, FTPlan{Complex{Float64}, 2, $K}}, x::Matrix{Complex{Float64}})
+            checksize(p, x)
+            conj!(x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'C', p, x, size(x, 1), size(x, 2))
+            conj!(x)
+            return x
+        end
     end
-    ccall((:ft_execute_spinsph_analysis, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), p, x, size(x, 1), size(x, 2))
-    return x
 end
 
 function plan_sph_isometry(::Type{Float64}, n::Integer)
@@ -834,17 +867,17 @@ for (fJ, fC) in ((:lmul!, :ft_mpfr_trmm_ptr),
 end
 
 for (fJ, fC, T, N, K) in ((:lmul!, :ft_execute_sph2fourier, Float64, 2, SPHERE),
-                    (:ldiv!, :ft_execute_fourier2sph, Float64, 2, SPHERE),
-                    (:lmul!, :ft_execute_sphv2fourier, Float64, 2, SPHEREV),
-                    (:ldiv!, :ft_execute_fourier2sphv, Float64, 2, SPHEREV),
-                    (:lmul!, :ft_execute_spinsph2fourier, Complex{Float64}, 2, SPINSPHERE),
-                    (:ldiv!, :ft_execute_fourier2spinsph, Complex{Float64}, 2, SPINSPHERE),
-                    (:lmul!, :ft_execute_disk2cxf, Float64, 2, DISK),
-                    (:ldiv!, :ft_execute_cxf2disk, Float64, 2, DISK),
-                    (:lmul!, :ft_execute_rectdisk2cheb, Float64, 2, RECTDISK),
-                    (:ldiv!, :ft_execute_cheb2rectdisk, Float64, 2, RECTDISK),
-                    (:lmul!, :ft_execute_tri2cheb, Float64, 2, TRIANGLE),
-                    (:ldiv!, :ft_execute_cheb2tri, Float64, 2, TRIANGLE))
+                          (:ldiv!, :ft_execute_fourier2sph, Float64, 2, SPHERE),
+                          (:lmul!, :ft_execute_sphv2fourier, Float64, 2, SPHEREV),
+                          (:ldiv!, :ft_execute_fourier2sphv, Float64, 2, SPHEREV),
+                          (:lmul!, :ft_execute_spinsph2fourier, Complex{Float64}, 2, SPINSPHERE),
+                          (:ldiv!, :ft_execute_fourier2spinsph, Complex{Float64}, 2, SPINSPHERE),
+                          (:lmul!, :ft_execute_disk2cxf, Float64, 2, DISK),
+                          (:ldiv!, :ft_execute_cxf2disk, Float64, 2, DISK),
+                          (:lmul!, :ft_execute_rectdisk2cheb, Float64, 2, RECTDISK),
+                          (:ldiv!, :ft_execute_cheb2rectdisk, Float64, 2, RECTDISK),
+                          (:lmul!, :ft_execute_tri2cheb, Float64, 2, TRIANGLE),
+                          (:ldiv!, :ft_execute_cheb2tri, Float64, 2, TRIANGLE))
     @eval begin
         function $fJ(p::FTPlan{$T, $N, $K}, x::Array{$T, $N})
             checksize(p, x)
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 79a6fd5e..3b3379b5 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -90,6 +90,12 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         C = ps*(p*A)
         A = p\(pa*C)
         @test A ≈ B
+        C = ps'*(p'A)
+        A = p'\(pa'C)
+        @test A ≈ B
+        C = transpose(ps)*(transpose(p)*A)
+        A = transpose(p)\(transpose(pa)*C)
+        @test A ≈ B
     end
 
     A = sphones(Float64, n, 2n-1)
@@ -147,6 +153,7 @@ FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
     pa = plan_tri_analysis(A)
     test_nd_plans(p, ps, pa, A)
 
+    α, β, γ, δ = -0.1, -0.2, -0.3, -0.4
     A = tetones(Float64, n, n, n)
     p = plan_tet2cheb(A, α, β, γ, δ)
     ps = plan_tet_synthesis(A)

From 0cba2dee766e5d4adaf24c6abd134e1640d79421 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 10 Feb 2021 15:56:33 -0600
Subject: [PATCH 068/222] add developer documentation

close #134
---
 docs/make.jl    |  1 +
 docs/src/dev.md | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 docs/src/dev.md

diff --git a/docs/make.jl b/docs/make.jl
index 534ff439..bd40d517 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -36,6 +36,7 @@ makedocs(
             authors = "Richard Mikael Slevinsky",
             pages = Any[
                     "Home" => "index.md",
+                    "Development" => "dev.md",
                     "Examples" => [
                         "generated/automaticdifferentiation.md",
                         "generated/chebyshev.md",
diff --git a/docs/src/dev.md b/docs/src/dev.md
new file mode 100644
index 00000000..5d371842
--- /dev/null
+++ b/docs/src/dev.md
@@ -0,0 +1,61 @@
+# Development Documentation
+
+The core of `FastTransforms.jl` is developed in parallel with the [C library](https://github.com/MikaelSlevinsky/FastTransforms) of the same name. Julia and C interoperability is enhanced by the [BinaryBuilder](https://github.com/JuliaPackaging/BinaryBuilder.jl) infrastructure, which provides the user a safe and seamless experience using a package in a different language.
+
+## Why two packages?
+
+Orthogonal polynomial transforms are performance-sensitive imperative tasks. Yet, many of Julia's rich and evolving language features are simply unnecessary for defining these computational routines. Moreover, rapid language changes in Julia (as compared to C) have been more than a perturbation to this repository in the past.
+
+The C library generates assembly for vectorized operations such as single instruction multiple data (SIMD) that is more efficient than that generated by a compiler without human intervention. It also uses OpenMP to introduce shared memory parallelism for large tasks. Finally, calling into precompiled binaries reduces the Julia package's pre-compilation, improving the user experience. Some of these capabilities also exist in Julia, but with C there is just more control over performance.
+
+C libraries are easier to call from any other language, partly explaining why the Python package manager Spack [already supports the C library](https://spack.readthedocs.io/en/latest/package_list.html#fasttransforms) through third-party efforts.
+
+## The developer's right to build from source
+
+Precompiled binaries are important for users, but development in C may be greatly accelerated by coupling it with a dynamic language such as Julia. For this reason, the repository preserves the developer's right to build the C library from source by setting an environment variable to trigger the build script:
+
+```julia
+julia> ENV["FT_BUILD_FROM_SOURCE"] = "true"
+"true"
+
+(@v1.5) pkg> build FastTransforms
+   Building FFTW ──────────→ `~/.julia/packages/FFTW/ayqyZ/deps/build.log`
+   Building TimeZones ─────→ `~/.julia/packages/TimeZones/K98G0/deps/build.log`
+   Building FastTransforms → `~/.julia/dev/FastTransforms/deps/build.log`
+
+julia> using FastTransforms
+[ Info: Precompiling FastTransforms [057dd010-8810-581a-b7be-e3fc3b93f78c]
+
+```
+
+This lets the developer experiment with new features through `ccall`ing into bleeding edge source code. Customizing the build script further allows the developer to track a different branch or even a fork.
+
+## From release to release to release
+
+To get from a C library release to a Julia package release, the developer needs to update Yggdrasil's [build_tarballs.jl](https://github.com/JuliaPackaging/Yggdrasil/blob/master/F/FastTransforms/build_tarballs.jl) script for the new version and its 256-bit SHA. On macOS, the SHA can be found by:
+
+```julia
+julia> run(`curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.5.0 --output FastTransforms-0.5.0.tar.gz`)
+  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
+                                 Dload  Upload   Total   Spent    Left  Speed
+100  156k    0  156k    0     0   351k      0 --:--:-- --:--:-- --:--:--  350k
+Process(`curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.5.0 --output FastTransforms-0.5.0.tar.gz`, ProcessExited(0))
+
+julia> run(`shasum -a 256 FastTransforms-0.5.0.tar.gz`)
+9556d0037bd5348a33f15ad6100e32053b6e22cab16a97c504f30d6c52fd0efd  FastTransforms-0.5.0.tar.gz
+Process(`shasum -a 256 FastTransforms-0.5.0.tar.gz`, ProcessExited(0))
+
+julia> run(`rm -f FastTransforms-0.5.0.tar.gz`)
+Process(`rm -f FastTransforms-0.5.0.tar.gz`, ProcessExited(0))
+```
+
+Then we wait for the friendly folks at [JuliaPackaging](https://github.com/JuliaPackaging) to merge the pull request to Yggdrasil, triggering a new release of the [FastTransforms_jll.jl](https://github.com/JuliaBinaryWrappers/FastTransforms_jll.jl) meta package that stores all precompiled binaries. With this release, we update the FastTransforms.jl [Project.toml](https://github.com/JuliaApproximation/FastTransforms.jl/blob/master/Project.toml) to point to the latest release and register the new version.
+
+Since development of Yggdrasil is quite rapid, a fork may easily become stale. Git permits the developer to forcibly make a master branch on a fork even with upstream master:
+
+```
+git fetch upstream
+git checkout master
+git reset --hard upstream/master
+git push origin master --force
+```

From daecadaf9c59d65b15140dd3b545ce4f8e023528 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 10 Feb 2021 16:27:58 -0600
Subject: [PATCH 069/222] n-d adjoints and transposes

---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index ed99a1a4..ad13ea1d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.11.3"
+version = "0.12.0"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -25,7 +25,7 @@ BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.4.1"
+FastTransforms_jll = "0.5.0"
 FillArrays = "0.9, 0.10, 0.11"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1"

From 3b3bb32397d977f7aa8b9df7f42a4dbb679427b7 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 11 Feb 2021 10:37:48 -0600
Subject: [PATCH 070/222] expand docs

---
 docs/src/dev.md | 37 +++++++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/docs/src/dev.md b/docs/src/dev.md
index 5d371842..869d75b8 100644
--- a/docs/src/dev.md
+++ b/docs/src/dev.md
@@ -6,10 +6,14 @@ The core of `FastTransforms.jl` is developed in parallel with the [C library](ht
 
 Orthogonal polynomial transforms are performance-sensitive imperative tasks. Yet, many of Julia's rich and evolving language features are simply unnecessary for defining these computational routines. Moreover, rapid language changes in Julia (as compared to C) have been more than a perturbation to this repository in the past.
 
-The C library generates assembly for vectorized operations such as single instruction multiple data (SIMD) that is more efficient than that generated by a compiler without human intervention. It also uses OpenMP to introduce shared memory parallelism for large tasks. Finally, calling into precompiled binaries reduces the Julia package's pre-compilation, improving the user experience. Some of these capabilities also exist in Julia, but with C there is just more control over performance.
+The C library generates assembly for vectorized operations such as single instruction multiple data (SIMD) that is more efficient than that generated by a compiler without human intervention. It also uses OpenMP to introduce shared memory parallelism for large tasks. Finally, calling into precompiled binaries reduces the Julia package's pre-compilation and dependencies, improving the user experience. Some of these capabilities also exist in Julia, but with C there is frankly more control over performance.
 
 C libraries are easier to call from any other language, partly explaining why the Python package manager Spack [already supports the C library](https://spack.readthedocs.io/en/latest/package_list.html#fasttransforms) through third-party efforts.
 
+In Julia, a parametric composite type with unrestricted type parameters is just about as big as `Any`. Such a type allows the Julia API to far exceed the C API in its ability to unify all of the orthogonal polynomial transforms and present them as linear operators. The `mutable struct FTPlan{T, N, K}`, together with `AdjointFTPlan` and `TransposeFTPlan`, are the core Julia types in this repository. Whereas `T` is understood to represent element type of the plan and `N` represents the number of leading dimensions of the array on which it operates, `K` is a mere integer which serves to distinguish the orthogonal polynomials at play. For example, `FTPlan{Float64, 1, LEG2CHEB}` represents the necessary pre-computations to convert 64-bit Legendre series to Chebyshev series (of the first kind). `N == 1` because Chebyshev and Legendre series are naturally represented with vectors of coefficients. However, this particular plan may operate not only on vectors but also on matrices, column-by-column.
+
+!!! note When working with specialized `FTPlan`s, it is prudent to use the named constants for `K`, such as `FastTransforms.LEG2CHEB`, rather than their literal integer values as these may change when future plans become operational.
+
 ## The developer's right to build from source
 
 Precompiled binaries are important for users, but development in C may be greatly accelerated by coupling it with a dynamic language such as Julia. For this reason, the repository preserves the developer's right to build the C library from source by setting an environment variable to trigger the build script:
@@ -35,18 +39,35 @@ This lets the developer experiment with new features through `ccall`ing into ble
 To get from a C library release to a Julia package release, the developer needs to update Yggdrasil's [build_tarballs.jl](https://github.com/JuliaPackaging/Yggdrasil/blob/master/F/FastTransforms/build_tarballs.jl) script for the new version and its 256-bit SHA. On macOS, the SHA can be found by:
 
 ```julia
-julia> run(`curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.5.0 --output FastTransforms-0.5.0.tar.gz`)
+shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.5.0 --output FastTransforms-0.5.0.tar.gz
   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                  Dload  Upload   Total   Spent    Left  Speed
-100  156k    0  156k    0     0   351k      0 --:--:-- --:--:-- --:--:--  350k
-Process(`curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.5.0 --output FastTransforms-0.5.0.tar.gz`, ProcessExited(0))
+100  156k  100  156k    0     0   349k      0 --:--:-- --:--:-- --:--:--  348k
 
-julia> run(`shasum -a 256 FastTransforms-0.5.0.tar.gz`)
+shell> shasum -a 256 FastTransforms-0.5.0.tar.gz
 9556d0037bd5348a33f15ad6100e32053b6e22cab16a97c504f30d6c52fd0efd  FastTransforms-0.5.0.tar.gz
-Process(`shasum -a 256 FastTransforms-0.5.0.tar.gz`, ProcessExited(0))
 
-julia> run(`rm -f FastTransforms-0.5.0.tar.gz`)
-Process(`rm -f FastTransforms-0.5.0.tar.gz`, ProcessExited(0))
+shell> rm -f FastTransforms-0.5.0.tar.gz
+
+```
+
+Using [SHA.jl](https://github.com/JuliaCrypto/SHA.jl), the SHA can also be found by:
+
+```julia
+shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.5.0 --output FastTransforms-0.5.0.tar.gz
+  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
+                                 Dload  Upload   Total   Spent    Left  Speed
+100  156k    0  156k    0     0   443k      0 --:--:-- --:--:-- --:--:--  443k
+
+julia> using SHA
+
+julia> open("FastTransforms-0.5.0.tar.gz") do f
+           bytes2hex(sha256(f))
+       end
+"9556d0037bd5348a33f15ad6100e32053b6e22cab16a97c504f30d6c52fd0efd"
+
+shell> rm -f FastTransforms-0.5.0.tar.gz
+
 ```
 
 Then we wait for the friendly folks at [JuliaPackaging](https://github.com/JuliaPackaging) to merge the pull request to Yggdrasil, triggering a new release of the [FastTransforms_jll.jl](https://github.com/JuliaBinaryWrappers/FastTransforms_jll.jl) meta package that stores all precompiled binaries. With this release, we update the FastTransforms.jl [Project.toml](https://github.com/JuliaApproximation/FastTransforms.jl/blob/master/Project.toml) to point to the latest release and register the new version.

From 79224280daab24fa967f7d66c59b9050ec946dea Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 11 Feb 2021 10:57:57 -0600
Subject: [PATCH 071/222] put the note on its own line

---
 docs/src/dev.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/src/dev.md b/docs/src/dev.md
index 869d75b8..20e055e9 100644
--- a/docs/src/dev.md
+++ b/docs/src/dev.md
@@ -12,7 +12,8 @@ C libraries are easier to call from any other language, partly explaining why th
 
 In Julia, a parametric composite type with unrestricted type parameters is just about as big as `Any`. Such a type allows the Julia API to far exceed the C API in its ability to unify all of the orthogonal polynomial transforms and present them as linear operators. The `mutable struct FTPlan{T, N, K}`, together with `AdjointFTPlan` and `TransposeFTPlan`, are the core Julia types in this repository. Whereas `T` is understood to represent element type of the plan and `N` represents the number of leading dimensions of the array on which it operates, `K` is a mere integer which serves to distinguish the orthogonal polynomials at play. For example, `FTPlan{Float64, 1, LEG2CHEB}` represents the necessary pre-computations to convert 64-bit Legendre series to Chebyshev series (of the first kind). `N == 1` because Chebyshev and Legendre series are naturally represented with vectors of coefficients. However, this particular plan may operate not only on vectors but also on matrices, column-by-column.
 
-!!! note When working with specialized `FTPlan`s, it is prudent to use the named constants for `K`, such as `FastTransforms.LEG2CHEB`, rather than their literal integer values as these may change when future plans become operational.
+!!! note
+    When working with specialized `FTPlan`s, it is prudent to use the named constants for `K`, such as `FastTransforms.LEG2CHEB`, rather than their literal integer values as these may change when future plans become operational.
 
 ## The developer's right to build from source
 

From ec81139a4222aecb644d9a9ef0493e6f4653f42b Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Tue, 16 Feb 2021 16:57:43 +0000
Subject: [PATCH 072/222] Update Project.toml

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index ad13ea1d..422d3bd2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -20,7 +20,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "0.5, 1.0"
-ArrayLayouts = "0.4, 0.5"
+ArrayLayouts = "0.4, 0.5, 0.6"
 BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"

From 2ee166d75ab09b8b444fb18b6a251ba74df5c10a Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Wed, 17 Feb 2021 15:46:55 +0000
Subject: [PATCH 073/222] v0.12.1

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 422d3bd2..eab7bcf9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.12.0"
+version = "0.12.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From 7c82665d15ab349eea30aaa57ab4240228f7ac33 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 17 Mar 2021 14:39:49 -0500
Subject: [PATCH 074/222] Fix spin-weighted with |m| = 1

Close #139 and #136
---
 Project.toml             |  4 ++--
 deps/build.jl            |  2 ++
 src/libfasttransforms.jl | 11 +++++------
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/Project.toml b/Project.toml
index eab7bcf9..6c12fd40 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.12.1"
+version = "0.12.2"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -25,7 +25,7 @@ BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.5.0"
+FastTransforms_jll = "0.5.1"
 FillArrays = "0.9, 0.10, 0.11"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1"
diff --git a/deps/build.jl b/deps/build.jl
index 38cd3766..944f1e2e 100644
--- a/deps/build.jl
+++ b/deps/build.jl
@@ -42,4 +42,6 @@ if ft_build_from_source == "true"
         print_error()
     end
     println("FastTransforms built from source.")
+else
+    println("FastTransforms using precompiled binaries.")
 end
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 0fdf681d..9a073e29 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -10,13 +10,14 @@ else
     using FastTransforms_jll
 end
 
-function ft_fftw_plan_with_nthreads(n::Integer)
-    ccall((:ft_fftw_plan_with_nthreads, libfasttransforms), Cvoid, (Cint, ), n)
-end
+ft_set_num_threads(n::Integer) = ccall((:ft_set_num_threads, libfasttransforms), Cvoid, (Cint, ), n)
+ft_fftw_plan_with_nthreads(n::Integer) = ccall((:ft_fftw_plan_with_nthreads, libfasttransforms), Cvoid, (Cint, ), n)
 
 function __init__()
+    n = ceil(Int, Sys.CPU_THREADS/2)
+    ft_set_num_threads(n)
     ccall((:ft_fftw_init_threads, libfasttransforms), Cint, ())
-    ft_fftw_plan_with_nthreads(ceil(Int, Sys.CPU_THREADS/2))
+    ft_fftw_plan_with_nthreads(n)
 end
 
 
@@ -49,8 +50,6 @@ function renew!(x::Array{BigFloat})
     return x
 end
 
-set_num_threads(n::Integer) = ccall((:ft_set_num_threads, libfasttransforms), Cvoid, (Cint, ), n)
-
 function horner!(c::Vector{Float64}, x::Vector{Float64}, f::Vector{Float64})
     @assert length(x) == length(f)
     ccall((:ft_horner, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, 1, length(x), x, f)

From 63d74c199c6ea2d1dac20c4763e226e244b758a0 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 17 Mar 2021 14:48:21 -0500
Subject: [PATCH 075/222] update ft_set_num_threads everywhere

---
 test/libfasttransformstests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 3b3379b5..818938fa 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -1,6 +1,6 @@
 using FastTransforms, Test
 
-FastTransforms.set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
+FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
 
 @testset "libfasttransforms" begin
     n = 64

From a46b963989a00d113f4a36222eb564b91c89921d Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Sun, 28 Mar 2021 11:12:07 -0500
Subject: [PATCH 076/222] fix UndefVarError in tests

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 6c12fd40..cc0e12fb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.12.2"
+version = "0.12.3"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From feb2c18fc2b1ba1e2a7a60feb04289fc17d7b4e6 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Sat, 8 May 2021 20:39:28 +0100
Subject: [PATCH 077/222] Update Project.toml (#143)

---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index cc0e12fb..14b40770 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.12.3"
+version = "0.12.4"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -20,7 +20,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "0.5, 1.0"
-ArrayLayouts = "0.4, 0.5, 0.6"
+ArrayLayouts = "0.4, 0.5, 0.6, 0.7"
 BinaryProvider = "0.5"
 DSP = "0.6"
 FFTW = "1"

From 1b492a0be8f953220a3c9d91d4149b05d67a7a49 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Sun, 1 Aug 2021 09:47:09 +0100
Subject: [PATCH 078/222] Update CompatHelper.yml

---
 .github/workflows/CompatHelper.yml | 36 +++++++++++++++---------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index 4b384d23..7784f241 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -1,26 +1,26 @@
 name: CompatHelper
-
 on:
   schedule:
-    - cron: '00 * * * *'
-
+    - cron: 0 0 * * *
+  workflow_dispatch:
 jobs:
-  build:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        julia-version: [1.3.0]
-        julia-arch: [x86]
-        os: [ubuntu-latest]
+  CompatHelper:
+    runs-on: ubuntu-latest
     steps:
-      - uses: julia-actions/setup-julia@latest
-        with:
-          version: ${{ matrix.julia-version }}
-      - name: Install dependencies
-        run: julia -e 'using Pkg; Pkg.add(Pkg.PackageSpec(name = "CompatHelper", url = "https://github.com/bcbi/CompatHelper.jl.git"))'
-      - name: CompatHelper.main
+      - name: "Install CompatHelper"
+        run: |
+          import Pkg
+          name = "CompatHelper"
+          uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
+          version = "2"
+          Pkg.add(; name, uuid, version)
+        shell: julia --color=yes {0}
+      - name: "Run CompatHelper"
+        run: |
+          import CompatHelper
+          CompatHelper.main()
+        shell: julia --color=yes {0}
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }}
-          JULIA_DEBUG: CompatHelper
-        run: julia -e 'using CompatHelper; CompatHelper.main()'
+          # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }}

From f46dd9b9ac5c7269a03f61817dcd16824e3f0a3e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 1 Aug 2021 11:00:34 +0100
Subject: [PATCH 079/222] CompatHelper: bump compat for "ToeplitzMatrices" to
 "0.7" (#147)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 14b40770..39a62d43 100644
--- a/Project.toml
+++ b/Project.toml
@@ -29,5 +29,5 @@ FastTransforms_jll = "0.5.1"
 FillArrays = "0.9, 0.10, 0.11"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1"
-ToeplitzMatrices = "0.6"
+ToeplitzMatrices = "0.6, 0.7"
 julia = "1.3"

From 66982c8f766c601acf7432bc13ffe9f13430b969 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 1 Aug 2021 11:00:48 +0100
Subject: [PATCH 080/222] CompatHelper: bump compat for "FillArrays" to "0.12"
 (#148)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 39a62d43..21b65178 100644
--- a/Project.toml
+++ b/Project.toml
@@ -26,7 +26,7 @@ DSP = "0.6"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.5.1"
-FillArrays = "0.9, 0.10, 0.11"
+FillArrays = "0.9, 0.10, 0.11, 0.12"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1"
 ToeplitzMatrices = "0.6, 0.7"

From 19003549cc05ab558d5fc3c73af04447096c9a1c Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sun, 1 Aug 2021 11:01:03 +0100
Subject: [PATCH 081/222] CompatHelper: bump compat for "DSP" to "0.7" (#149)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 21b65178..576651e5 100644
--- a/Project.toml
+++ b/Project.toml
@@ -22,7 +22,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 AbstractFFTs = "0.5, 1.0"
 ArrayLayouts = "0.4, 0.5, 0.6, 0.7"
 BinaryProvider = "0.5"
-DSP = "0.6"
+DSP = "0.6, 0.7"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.5.1"

From 7fa959504bbfbfa04a261b9782bcaca88ce811bf Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Sun, 1 Aug 2021 11:01:24 +0100
Subject: [PATCH 082/222] v0.12.5

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 576651e5..c8f40b63 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.12.4"
+version = "0.12.5"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From bc83208bb9e9bb0b77babd12ad7cace3869ee312 Mon Sep 17 00:00:00 2001
From: Paul Nemec <58543214+Luapulu@users.noreply.github.com>
Date: Thu, 9 Sep 2021 17:51:23 +0200
Subject: [PATCH 083/222] Add explanation of lexigraphical order (#150)

---
 src/PaduaTransform.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/PaduaTransform.jl b/src/PaduaTransform.jl
index 9cba3251..86a88eca 100644
--- a/src/PaduaTransform.jl
+++ b/src/PaduaTransform.jl
@@ -1,5 +1,10 @@
 
 # lex indicates if its lexigraphical (i.e., x, y) or reverse (y, x)
+# If in lexigraphical order the coefficient vector's entries
+# corrrespond to the following basis polynomials:
+# [T0(x) * T0(y), T1(x) * T0(y), T0(x) * T1(y), T2(x) * T0(y), T1(x) * T1(y), T0(x) * T2(y), ...]
+# else, if not in lexigraphical order:
+# [T0(x) * T0(y), T0(x) * T1(y), T1(x) * T0(y), T0(x) * T2(y), T1(x) * T1(y), T2(x) * T0(y), ...]
 """
 Pre-plan an Inverse Padua Transform.
 """

From ef00b658ebdcb9670b74b67f795050ac23d3f6da Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 27 Sep 2021 22:07:34 +0100
Subject: [PATCH 084/222] convert to Array since copy leaves immutable (#151)

* convert to Array since copy leaves immutable

* Update ci.yml
---
 .github/workflows/ci.yml  |  3 ++-
 Project.toml              |  2 +-
 src/chebyshevtransform.jl | 16 ++++++++--------
 test/chebyshevtests.jl    |  5 +++++
 4 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 223528e7..2436087e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,7 +10,8 @@ jobs:
       matrix:
         version:
           - '1.3'
-          - '1.5'
+          - '1'
+          - '^1.7.0-0'
         os:
           - ubuntu-latest
           - macOS-latest
diff --git a/Project.toml b/Project.toml
index c8f40b63..dd8f7327 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.12.5"
+version = "0.12.6"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index e07ed280..82a61c4b 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -73,7 +73,7 @@ function *(P::ChebyshevTransformPlan{T,2,true}, x::AbstractVector{T}) where T
 end
 
 *(P::ChebyshevTransformPlan{T,k,false}, x::AbstractVector{T}) where {T,k} = 
-    ChebyshevTransformPlan{T,k,true}(P)*copy(x)
+    ChebyshevTransformPlan{T,k,true}(P)*Array(x)
 
 chebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where T =
     plan_chebyshevtransform!(x, kind)*x
@@ -85,7 +85,7 @@ chebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where T =
 transforms from values on a Chebyshev grid of the first or second kind to Chebyshev
 coefficients.
 """
-chebyshevtransform(x, kind=Val(1)) = chebyshevtransform!(copy(x), kind)
+chebyshevtransform(x, kind=Val(1)) = chebyshevtransform!(Array(x), kind)
 
 
 ## Inverse transforms take Chebyshev coefficients and produce values at Chebyshev points of the first and second kinds
@@ -165,12 +165,12 @@ function *(P::IChebyshevTransformPlan{T,2, true}, x::AbstractVector{T}) where T<
 end
 
 *(P::IChebyshevTransformPlan{T,k,false},x::AbstractVector{T}) where {T,k} = 
-    IChebyshevTransformPlan{T,k,true}(P)*copy(x)
+    IChebyshevTransformPlan{T,k,true}(P)*Array(x)
 
 ichebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where T =
     plan_ichebyshevtransform!(x, kind)*x
 
-ichebyshevtransform(x, kind=Val(1)) = ichebyshevtransform!(copy(x), kind)
+ichebyshevtransform(x, kind=Val(1)) = ichebyshevtransform!(Array(x), kind)
 
 # Matrix inputs
 #
@@ -296,9 +296,9 @@ chebyshevutransform!(x::AbstractVector{T}, kind=Val(1)) where {T<:fftwNumber} =
 transforms from values on a Chebyshev grid of the first or second kind to Chebyshev
 coefficients of the 2nd kind (Chebyshev U expansion).
 """
-chebyshevutransform(x, kind=Val(1)) = chebyshevutransform!(copy(x), kind)
+chebyshevutransform(x, kind=Val(1)) = chebyshevutransform!(Array(x), kind)
 
-*(P::ChebyshevUTransformPlan{T,k,false}, x::AbstractVector{T}) where {T,k} = ChebyshevUTransformPlan{T,k,true}(P)*copy(x)
+*(P::ChebyshevUTransformPlan{T,k,false}, x::AbstractVector{T}) where {T,k} = ChebyshevUTransformPlan{T,k,true}(P)*Array(x)
 
 ## Inverse transforms take ChebyshevU coefficients and produce values at ChebyshevU points of the first and second kinds
 
@@ -374,10 +374,10 @@ end
 ichebyshevutransform!(x::AbstractVector{T}, kind=Val(1)) where {T<:fftwNumber} =
     plan_ichebyshevutransform!(x, kind)*x
 
-ichebyshevutransform(x, kind=Val(1)) = ichebyshevutransform!(copy(x), kind)
+ichebyshevutransform(x, kind=Val(1)) = ichebyshevutransform!(Array(x), kind)
 
 *(P::IChebyshevUTransformPlan{T,k,false},x::AbstractVector{T}) where {T,k} = 
-    IChebyshevUTransformPlan{T,k,true}(P)*copy(x)
+    IChebyshevUTransformPlan{T,k,true}(P)*Array(x)
 
 
 ## Code generation for integer inputs
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 87db2131..0383df2c 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -224,4 +224,9 @@ using FastTransforms, Test
         @test plan_chebyshevtransform!(x)copy(x) ≈ chebyshevtransform(x)
         @test plan_ichebyshevtransform!(x)copy(x) ≈ ichebyshevtransform(x)
     end
+
+    @testset "immutable vectors" begin
+        F = plan_chebyshevtransform([1.,2,3])
+        @test chebyshevtransform(1.0:3) == F * (1:3)
+    end
 end

From 4199ae9ac7970390da2546aef7f290f4cf675c65 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 11 Oct 2021 16:26:58 +0100
Subject: [PATCH 085/222] Support matrix chebyshev transforms, matrix
 coefficients in Clenshaw (#152)

* in-place allocates so support out-of-place Chebyshev transform

* Update chebyshevtransform.jl

* use FIRSTKIND, etc. instead of 1, 2 (matching r2r)

* r2r kind may be tuple

* remove assert_applicable as FFTW does that for us

* 2nd kind matrix transforms

* work in itransform

* fix itransforms

* Tests pass

* Update chebyshevutransform

* Drop Julia VERSION < v1.6

* Use FFTW.R* instead of explicit numbers

* Update ichebyshevu

* Support Matrix coefficients in clenshaw

* add Clenshaw tests

* add docs
---
 .github/workflows/ci.yml  |   3 +-
 Project.toml              |   6 +-
 src/chebyshevtransform.jl | 601 ++++++++++++++++++++++++--------------
 src/clenshaw.jl           |  57 ++++
 test/chebyshevtests.jl    |  65 ++---
 test/clenshawtests.jl     |  12 +
 6 files changed, 488 insertions(+), 256 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2436087e..6c2b3b92 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,8 +9,7 @@ jobs:
     strategy:
       matrix:
         version:
-          - '1.3'
-          - '1'
+          - '1.6'
           - '^1.7.0-0'
         os:
           - ubuntu-latest
diff --git a/Project.toml b/Project.toml
index dd8f7327..7fa3cf03 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.12.6"
+version = "0.13.0"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -19,7 +19,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
-AbstractFFTs = "0.5, 1.0"
+AbstractFFTs = "1.0"
 ArrayLayouts = "0.4, 0.5, 0.6, 0.7"
 BinaryProvider = "0.5"
 DSP = "0.6, 0.7"
@@ -30,4 +30,4 @@ FillArrays = "0.9, 0.10, 0.11, 0.12"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1"
 ToeplitzMatrices = "0.6, 0.7"
-julia = "1.3"
+julia = "1.6"
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 82a61c4b..0563f26b 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -6,77 +6,136 @@ size(P::ChebyshevPlan) = isdefined(P, :plan) ? size(P.plan) : (0,)
 length(P::ChebyshevPlan) = isdefined(P, :plan) ? length(P.plan) : 0
 
 
-# Check whether a ChebyshevPlan is applicable to a given input array, and
-# throw an informative error if not:
-function assert_applicable(p::ChebyshevPlan{T}, X::StridedArray{T}) where T
-    if size(X) != size(p)
-        throw(ArgumentError("Chebyshev plan applied to wrong-size array"))
-    end
-end
+const FIRSTKIND = FFTW.REDFT10
+const SECONDKIND = FFTW.REDFT00
 
-struct ChebyshevTransformPlan{T,kind,inplace,P} <: ChebyshevPlan{T}
-    plan::FFTW.r2rFFTWPlan{T,P,true,1,UnitRange{Int}}
-    ChebyshevTransformPlan{T,kind,inplace,P}(plan) where {T,kind,inplace,P} = new{T,kind,inplace,P}(plan)
-    ChebyshevTransformPlan{T,kind,inplace,P}() where {T,kind,inplace,P} = new{T,kind,inplace,P}()
+struct ChebyshevTransformPlan{T,kind,K,inplace,N,R} <: ChebyshevPlan{T}
+    plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}
+    ChebyshevTransformPlan{T,kind,K,inplace,N,R}(plan) where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}(plan)
+    ChebyshevTransformPlan{T,kind,K,inplace,N,R}() where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}()
 end
 
-ChebyshevTransformPlan{T,kind,inplace}(plan::FFTW.r2rFFTWPlan{T,P}) where {T,kind,inplace,P} = 
-    ChebyshevTransformPlan{T,kind,inplace,P}(plan)
-
-ChebyshevTransformPlan{T,kind,inplace}(plan::ChebyshevTransformPlan{T,kind,inp,P}) where {T,kind,inplace,inp,P} = 
-    ChebyshevTransformPlan{T,kind,inplace,P}(plan.plan)
+ChebyshevTransformPlan{T,kind,K}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
+    ChebyshevTransformPlan{T,kind,K,inplace,N,R}(plan)
 
-function plan_chebyshevtransform!(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+# jump through some hoops to make inferrable
+@inline kindtuple(KIND,N) = ntuple(_ -> KIND,N)
+@inline kindtuple(KIND,N,::Integer) = (KIND,)
+function plan_chebyshevtransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevTransformPlan{T,1,true,(5,)}()
+        ChebyshevTransformPlan{T,1,kindtuple(FIRSTKIND,N,dims...),true,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
     else
-        ChebyshevTransformPlan{T,1,true,(5,)}(FFTW.plan_r2r!(x, FFTW.REDFT10))
+        ChebyshevTransformPlan{T,1,kindtuple(FIRSTKIND,N,dims...)}(FFTW.plan_r2r!(x, FIRSTKIND, dims...; kws...))
     end
 end
-function plan_chebyshevtransform!(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
-    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
-    ChebyshevTransformPlan{T,2,true,(3,)}(FFTW.plan_r2r!(x, FFTW.REDFT00))
+function plan_chebyshevtransform!(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
+    any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
+    ChebyshevTransformPlan{T,2,kindtuple(SECONDKIND,N,dims...)}(FFTW.plan_r2r!(x, SECONDKIND, dims...; kws...))
 end
 
 
-function plan_chebyshevtransform(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+function plan_chebyshevtransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevTransformPlan{T,1,false,(5,)}()
+        ChebyshevTransformPlan{T,1,kindtuple(FIRSTKIND,N,dims...),false,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
     else
-        ChebyshevTransformPlan{T,1,false,(5,)}(FFTW.plan_r2r!(x, FFTW.REDFT10))
+        ChebyshevTransformPlan{T,1,kindtuple(FIRSTKIND,N,dims...)}(FFTW.plan_r2r(x, FIRSTKIND, dims...; kws...))
     end
 end
-function plan_chebyshevtransform(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
-    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
-    ChebyshevTransformPlan{T,2,false,(3,)}(FFTW.plan_r2r!(x, FFTW.REDFT00))
+function plan_chebyshevtransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
+    any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
+    ChebyshevTransformPlan{T,2,kindtuple(SECONDKIND,N,dims...)}(FFTW.plan_r2r(x, SECONDKIND, dims...; kws...))
 end
 
-plan_chebyshevtransform!(x::AbstractVector) = plan_chebyshevtransform!(x, Val(1))
-plan_chebyshevtransform(x::AbstractVector) = plan_chebyshevtransform(x, Val(1))
+plan_chebyshevtransform!(x::AbstractArray, dims...; kws...) = plan_chebyshevtransform!(x, Val(1), dims...; kws...)
+plan_chebyshevtransform(x::AbstractArray, dims...; kws...) = plan_chebyshevtransform(x, Val(1), dims...; kws...)
+
+
+# convert x if necessary
+@inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::StridedArray{T}) where T = mul!(y, P, x)
+@inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::AbstractArray) where T = mul!(y, P, convert(Array{T}, x))
 
+@inline _cheb1_rescale!(_, y::AbstractVector) = (y[1] /= 2; ldiv!(length(y), y))
+
+@inline function _cheb1_rescale!(d::Number, y::AbstractMatrix{T}) where T
+    if isone(d)
+        ldiv!(2, view(y,1,:))
+    else
+        ldiv!(2, view(y,:,1))
+    end
+    ldiv!(size(y,d), y)
+end
 
-function *(P::ChebyshevTransformPlan{T,1,true}, x::AbstractVector{T}) where T
+# TODO: higher dimensional arrays
+@inline function _cheb1_rescale!(d::UnitRange, y::AbstractMatrix{T}) where T
+    @assert d == 1:2
+    ldiv!(2, view(y,1,:))
+    ldiv!(2, view(y,:,1))
+    ldiv!(prod(size(y)), y)
+end
+
+function *(P::ChebyshevTransformPlan{T,1,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
     n = length(x)
-    assert_applicable(P, x)
     n == 0 && return x
 
     y = P.plan*x # will be  === x if in-place
-    y[1] /= 2
-    lmul!(inv(convert(T,n)), y)
+    _cheb1_rescale!(P.plan.region, y)
 end
 
-function *(P::ChebyshevTransformPlan{T,2,true}, x::AbstractVector{T}) where T
+function mul!(y::AbstractArray{T,N}, P::ChebyshevTransformPlan{T,1,K,false,N}, x::AbstractArray{<:Any,N}) where {T,K,N}
+    n = length(x)
+    length(y) == n || throw(DimensionMismatch("output must match dimension"))
+    n == 0 && return y
+    _plan_mul!(y, P.plan, x)
+    _cheb1_rescale!(P.plan.region, y)
+end
+
+
+_cheb2_rescale!(_, y::AbstractVector) = (y[1] /= 2; y[end] /= 2; ldiv!(length(y)-1, y))
+
+function _cheb2_rescale!(d::Number, y::AbstractMatrix{T}) where T
+    if isone(d)
+        ldiv!(2, @view(y[1,:]))
+        ldiv!(2, @view(y[end,:]))
+    else
+        ldiv!(2, @view(y[:,1]))
+        ldiv!(2, @view(y[:,end]))
+    end
+    ldiv!(size(y,d)-1, y)
+end
+
+# TODO: higher dimensional arrays
+function _cheb2_rescale!(d::UnitRange, y::AbstractMatrix{T}) where T
+    @assert d == 1:2
+    ldiv!(2, @view(y[1,:]))
+    ldiv!(2, @view(y[end,:]))
+    ldiv!(2, @view(y[:,1]))
+    ldiv!(2, @view(y[:,end]))
+    ldiv!(prod(size(y) .- 1), y)
+end
+
+function *(P::ChebyshevTransformPlan{T,2,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
     n = length(x)
     y = P.plan*x # will be  === x if in-place
-    y[1] /= 2; y[end] /= 2
-    lmul!(inv(convert(T,n-1)),y)
+    _cheb2_rescale!(P.plan.region, y)
+end
+
+function mul!(y::AbstractArray{T,N}, P::ChebyshevTransformPlan{T,2,K,false,N}, x::AbstractArray{<:Any,N}) where {T,K,N}
+    n = length(x)
+    length(y) == n || throw(DimensionMismatch("output must match dimension"))
+    _plan_mul!(y, P.plan, x)
+    _cheb2_rescale!(P.plan.region, y)
 end
 
-*(P::ChebyshevTransformPlan{T,k,false}, x::AbstractVector{T}) where {T,k} = 
-    ChebyshevTransformPlan{T,k,true}(P)*Array(x)
+*(P::ChebyshevTransformPlan{T,kind,K,false,N}, x::AbstractArray{T,N}) where {T,kind,K,N} =
+    mul!(similar(x), P, x)
 
-chebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where T =
-    plan_chebyshevtransform!(x, kind)*x
+"""
+    chebyshevtransform!(x, kind=Val(1))
+
+transforms from values on a Chebyshev grid of the first or second kind to Chebyshev
+coefficients, in-place
+"""
+chebyshevtransform!(x, dims...; kws...) = plan_chebyshevtransform!(x, dims...; kws...)*x
 
 
 """
@@ -85,209 +144,294 @@ chebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where T =
 transforms from values on a Chebyshev grid of the first or second kind to Chebyshev
 coefficients.
 """
-chebyshevtransform(x, kind=Val(1)) = chebyshevtransform!(Array(x), kind)
+chebyshevtransform(x, dims...; kws...) = plan_chebyshevtransform(x, dims...; kws...) * x
 
 
 ## Inverse transforms take Chebyshev coefficients and produce values at Chebyshev points of the first and second kinds
 
 
-struct IChebyshevTransformPlan{T,kind,inplace,P} <: ChebyshevPlan{T}
-    plan::FFTW.r2rFFTWPlan{T,P,true,1,UnitRange{Int}}
-    IChebyshevTransformPlan{T,kind,inplace,P}(plan) where {T,kind,inplace,P} = new{T,kind,inplace,P}(plan)
-    IChebyshevTransformPlan{T,kind,inplace,P}() where {T,kind,inplace,P} = new{T,kind,inplace,P}()
-end
+const IFIRSTKIND = FFTW.REDFT01
 
-IChebyshevTransformPlan{T,kind,inplace}(F::FFTW.r2rFFTWPlan{T,P}) where {T,kind,inplace,P} = 
-    IChebyshevTransformPlan{T,kind,inplace,P}(F)
+struct IChebyshevTransformPlan{T,kind,K,inplace,N,R} <: ChebyshevPlan{T}
+    plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}
+    IChebyshevTransformPlan{T,kind,K,inplace,N,R}(plan) where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}(plan)
+    IChebyshevTransformPlan{T,kind,K,inplace,N,R}() where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}()
+end
 
-IChebyshevTransformPlan{T,kind,true}(F::IChebyshevTransformPlan{T,kind,false,P}) where {T,kind,P} = 
-    IChebyshevTransformPlan{T,kind,true,P}(F.plan)
+IChebyshevTransformPlan{T,kind,K}(F::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
+    IChebyshevTransformPlan{T,kind,K,inplace,N,R}(F)
 
-size(P::IChebyshevTransformPlan) = isdefined(P, :plan) ? size(P.plan) : (0,)
-length(P::IChebyshevTransformPlan) = isdefined(P, :plan) ? length(P.plan) : 0
 
 
 # second kind Chebyshev transforms share a plan with their inverse
 # so we support this via inv
-inv(P::ChebyshevTransformPlan{T,2,inp}) where {T,inp} = IChebyshevTransformPlan{T,2,inp}(P.plan)
-inv(P::IChebyshevTransformPlan{T,2,inp}) where {T,inp} = ChebyshevTransformPlan{T,2,inp}(P.plan)
+inv(P::ChebyshevTransformPlan{T,2,K}) where {T,K} = IChebyshevTransformPlan{T,2,K}(P.plan)
+inv(P::IChebyshevTransformPlan{T,2,K}) where {T,K} = ChebyshevTransformPlan{T,2,K}(P.plan)
 
 
 \(P::ChebyshevTransformPlan, x::AbstractArray) = inv(P) * x
 \(P::IChebyshevTransformPlan, x::AbstractArray) = inv(P) * x
 
 
-function plan_ichebyshevtransform!(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+function plan_ichebyshevtransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevTransformPlan{T,1,true,(4,)}()
+        IChebyshevTransformPlan{T,1,kindtuple(IFIRSTKIND,N,dims...),true,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
     else
-        IChebyshevTransformPlan{T,1,true,(4,)}(FFTW.plan_r2r!(x, FFTW.REDFT01))
+        IChebyshevTransformPlan{T,1,kindtuple(IFIRSTKIND,N,dims...)}(FFTW.plan_r2r!(x, IFIRSTKIND, dims...; kws...))
     end
 end
 
-function plan_ichebyshevtransform!(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
-    inv(plan_chebyshevtransform!(x, Val(2)))
+function plan_ichebyshevtransform!(x::AbstractArray{T}, ::Val{2}, dims...; kws...) where T<:fftwNumber
+    inv(plan_chebyshevtransform!(x, Val(2), dims...; kws...))
 end
 
-function plan_ichebyshevtransform(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+function plan_ichebyshevtransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevTransformPlan{T,1,false,(4,)}()
+        IChebyshevTransformPlan{T,1,kindtuple(IFIRSTKIND,N,dims...),false,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
     else
-        IChebyshevTransformPlan{T,1,false,(4,)}(FFTW.plan_r2r!(x, FFTW.REDFT01))
+        IChebyshevTransformPlan{T,1,kindtuple(IFIRSTKIND,N,dims...)}(FFTW.plan_r2r(x, IFIRSTKIND, dims...; kws...))
     end
 end
 
-function plan_ichebyshevtransform(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
-    inv(plan_chebyshevtransform(x, Val(2)))
+function plan_ichebyshevtransform(x::AbstractArray{T}, ::Val{2}, dims...; kws...) where T<:fftwNumber
+    inv(plan_chebyshevtransform(x, Val(2), dims...; kws...))
 end
 
-plan_ichebyshevtransform!(x::AbstractVector) = plan_ichebyshevtransform!(x, Val(1))
-plan_ichebyshevtransform(x::AbstractVector) = plan_ichebyshevtransform(x, Val(1))
+plan_ichebyshevtransform!(x::AbstractArray, dims...; kws...) = plan_ichebyshevtransform!(x, Val(1), dims...; kws...)
+plan_ichebyshevtransform(x::AbstractArray, dims...; kws...) = plan_ichebyshevtransform(x, Val(1), dims...; kws...)
+
+@inline _icheb1_prescale!(_, x::AbstractVector) = (x[1] *= 2)
+@inline function _icheb1_prescale!(d::Number, x::AbstractMatrix)
+    if isone(d)
+        lmul!(2, view(x,1,:))
+    else
+        lmul!(2, view(x,:,1))
+    end
+    x
+end
+@inline function _icheb1_prescale!(d::UnitRange, x::AbstractMatrix)
+    lmul!(2, view(x,:,1))
+    lmul!(2, view(x,1,:))
+    x
+end
+@inline _icheb1_postscale!(_, x::AbstractVector) = (x[1] /= 2)
+@inline function _icheb1_postscale!(d::Number, x::AbstractMatrix)
+    if isone(d)
+        ldiv!(2, view(x,1,:))
+    else
+        ldiv!(2, view(x,:,1))
+    end
+    x
+end
 
+@inline function _icheb1_postscale!(d::UnitRange, x::AbstractMatrix)
+    ldiv!(2, view(x,1,:))
+    ldiv!(2, view(x,:,1))
+    x
+end
 
-function *(P::IChebyshevTransformPlan{T,1,true}, x::AbstractVector{T}) where T<:fftwNumber
+function *(P::IChebyshevTransformPlan{T,1,K,true,N}, x::AbstractArray{T,N}) where {T<:fftwNumber,K,N}
     n = length(x)
-    assert_applicable(P, x)
     n == 0 && return x
 
-    x[1] *= 2
-    x = lmul!(convert(T,0.5), P.plan*x)
+    _icheb1_prescale!(P.plan.region, x)
+    x = ldiv!(2^length(P.plan.region), P.plan*x)
     x
-end 
-function *(P::IChebyshevTransformPlan{T,2, true}, x::AbstractVector{T}) where T<:fftwNumber
-    n = length(x)
-    assert_applicable(P, x)
-
-    x[1] *= 2; x[end] *= 2
-    x = ChebyshevTransformPlan{T,2,true}(P.plan)*x
-    x[1] *= 2; x[end] *= 2
-    lmul!(convert(T,0.5(n-1)),x)
 end
 
-*(P::IChebyshevTransformPlan{T,k,false},x::AbstractVector{T}) where {T,k} = 
-    IChebyshevTransformPlan{T,k,true}(P)*Array(x)
+function mul!(y::AbstractArray{T,N}, P::IChebyshevTransformPlan{T,1,K,false,N}, x::AbstractArray{T,N}) where {T<:fftwNumber,K,N}
+    size(y) == size(x) || throw(DimensionMismatch("output must match dimension"))
+    isempty(x) && return y
 
-ichebyshevtransform!(x::AbstractVector{T}, kind=Val(1)) where T =
-    plan_ichebyshevtransform!(x, kind)*x
-
-ichebyshevtransform(x, kind=Val(1)) = ichebyshevtransform!(Array(x), kind)
-
-# Matrix inputs
-#
-#
-function chebyshevtransform!(X::AbstractMatrix{T}, ::Val{1}) where T<:fftwNumber
-    if size(X) == (1,1)
-        X
-    else
-        X=FFTW.r2r!(X,FFTW.REDFT10)
-        X[:,1]/=2;X[1,:]/=2;
-        lmul!(1/(size(X,1)*size(X,2)),X)
-    end
+    _icheb1_prescale!(P.plan.region, x) # Todo: don't mutate x
+    _plan_mul!(y, P.plan, x)
+    _icheb1_postscale!(P.plan.region, x)
+    ldiv!(2^length(P.plan.region), y)
 end
 
-function chebyshevtransform!(X::AbstractMatrix{T}, ::Val{2}) where T<:fftwNumber
-    if size(X,1) < 2 || size(X,2) < 2
-        throw(ArgumentError("Chebyshev plan applied to wrong-size array"))
+@inline _icheb2_prescale!(_, x::AbstractVector) = (x[1] *= 2; x[end] *= 2)
+@inline function _icheb2_prescale!(d::Number, x::AbstractMatrix)
+    if isone(d)
+        lmul!(2, @view(x[1,:]))
+        lmul!(2, @view(x[end,:]))
     else
-        X=FFTW.r2r!(X,FFTW.REDFT00)
-        lmul!(1/((size(X,1)-1)*(size(X,2)-1)),X)
-        X[:,1]/=2;X[:,end]/=2
-        X[1,:]/=2;X[end,:]/=2
-        X
+        lmul!(2, @view(x[:,1]))
+        lmul!(2, @view(x[:,end]))
     end
+    x
 end
-#
-function ichebyshevtransform!(X::AbstractMatrix{T}, ::Val{1}) where T<:fftwNumber
-    if size(X) == (1,1)
-        X
-    else
-        X[1,:]*=2;X[:,1]*=2
-        X = FFTW.r2r(X,FFTW.REDFT01)
-        lmul!(0.25, X)
-    end
+@inline function _icheb2_prescale!(d::UnitRange, x::AbstractMatrix)
+    lmul!(2, @view(x[1,:]))
+    lmul!(2, @view(x[end,:]))
+    lmul!(2, @view(x[:,1]))
+    lmul!(2, @view(x[:,end]))
+    x
 end
-function ichebyshevtransform!(X::AbstractMatrix{T}, ::Val{2}) where T<:fftwNumber
-    if size(X,1) < 2 || size(X,2) < 2
-        throw(ArgumentError("Chebyshev plan applied to wrong-size array"))
+@inline _icheb2_postrescale!(_, x::AbstractVector) = (x[1] /= 2; x[end] /= 2)
+@inline function _icheb2_postrescale!(d::Number, x::AbstractMatrix)
+    if isone(d)
+        ldiv!(2, @view(x[1,:]))
+        ldiv!(2, @view(x[end,:]))
     else
-        X[1,:]*=2;X[end,:]*=2;X[:,1]*=2;X[:,end]*=2
-        X=chebyshevtransform!(X, Val(2))
-        X[1,:]*=2;X[end,:]*=2;X[:,1]*=2;X[:,end]*=2
-        lmul!((size(X,1)-1)*(size(X,2)-1)/4,X)
+        ldiv!(2, @view(x[:,1]))
+        ldiv!(2, @view(x[:,end]))
     end
+    x
+end
+@inline function _icheb2_postrescale!(d::UnitRange, x::AbstractMatrix)
+    ldiv!(2, @view(x[1,:]))
+    ldiv!(2, @view(x[end,:]))
+    ldiv!(2, @view(x[:,1]))
+    ldiv!(2, @view(x[:,end]))
+    x
+end
+@inline function _icheb2_rescale!(d::Number, y::AbstractArray{T}) where T
+    _icheb2_prescale!(d, y)
+    lmul!(convert(T, size(y,d) - 1)/2, y)
+    y
+end
+@inline function _icheb2_rescale!(d::UnitRange, y::AbstractArray{T}) where T
+    _icheb2_prescale!(d, y)
+    lmul!(prod(convert.(T, size(y) .- 1)./2), y)
+    y
 end
-#
 
+function *(P::IChebyshevTransformPlan{T,2,K,true,N}, x::AbstractArray{T,N}) where {T<:fftwNumber,K,N}
+    n = length(x)
 
-## Chebyshev U
+    _icheb2_prescale!(P.plan.region, x)
+    x = inv(P)*x
+    _icheb2_rescale!(P.plan.region, x)
+end
 
-struct ChebyshevUTransformPlan{T,kind,inplace,P} <: ChebyshevPlan{T}
-    plan::FFTW.r2rFFTWPlan{T,P,true,1,UnitRange{Int}}
-    ChebyshevUTransformPlan{T,kind,inplace,P}(plan) where {T,kind,inplace,P} = new{T,kind,inplace,P}(plan)
-    ChebyshevUTransformPlan{T,kind,inplace,P}() where {T,kind,inplace,P} = new{T,kind,inplace,P}()
+function mul!(y::AbstractArray{T,N}, P::IChebyshevTransformPlan{T,2,K,false,N}, x::AbstractArray{<:Any,N}) where {T<:fftwNumber,K,N}
+    n = length(x)
+    length(y) == n || throw(DimensionMismatch("output must match dimension"))
+
+    _icheb2_prescale!(P.plan.region, x)
+    _plan_mul!(y, inv(P), x)
+    _icheb2_postrescale!(P.plan.region, x)
+    _icheb2_rescale!(P.plan.region, y)
 end
 
-ChebyshevUTransformPlan{T,kind,inplace}(plan::FFTW.r2rFFTWPlan{T,P}) where {T,kind,inplace,P} = 
-    ChebyshevUTransformPlan{T,kind,inplace,P}(plan)
+*(P::IChebyshevTransformPlan{T,kind,K,false,N}, x::AbstractArray{T,N}) where {T,kind,K,N} = mul!(similar(x), P, x)
+ichebyshevtransform!(x::AbstractArray, dims...; kwds...) = plan_ichebyshevtransform!(x, dims...; kwds...)*x
+ichebyshevtransform(x, dims...; kwds...) = plan_ichebyshevtransform(x, dims...; kwds...)*x
 
-ChebyshevUTransformPlan{T,kind,inplace}(plan::ChebyshevUTransformPlan{T,kind,inp,P}) where {T,kind,inplace,inp,P} = 
-    ChebyshevUTransformPlan{T,kind,inplace,P}(plan.plan)
 
+## Chebyshev U
 
+const UFIRSTKIND = FFTW.RODFT10
+const USECONDKIND = FFTW.RODFT00
+
+struct ChebyshevUTransformPlan{T,kind,K,inplace,N,R} <: ChebyshevPlan{T}
+    plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}
+    ChebyshevUTransformPlan{T,kind,K,inplace,N,R}(plan) where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}(plan)
+    ChebyshevUTransformPlan{T,kind,K,inplace,N,R}() where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}()
+end
 
-function plan_chebyshevutransform!(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+ChebyshevUTransformPlan{T,kind,K}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
+    ChebyshevUTransformPlan{T,kind,K,inplace,N,R}(plan)
+
+
+function plan_chebyshevutransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevUTransformPlan{T,1,true,(9,)}()
+        ChebyshevUTransformPlan{T,1,kindtuple(UFIRSTKIND,N,dims...),true,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
     else
-        ChebyshevUTransformPlan{T,1,true,(9,)}(FFTW.plan_r2r!(x, FFTW.RODFT10))
+        ChebyshevUTransformPlan{T,1,kindtuple(UFIRSTKIND,N,dims...)}(FFTW.plan_r2r!(x, UFIRSTKIND, dims...; kws...))
     end
 end
-function plan_chebyshevutransform!(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
-    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
-    ChebyshevUTransformPlan{T,2,true,(7,)}(FFTW.plan_r2r!(x, FFTW.RODFT00))
+function plan_chebyshevutransform!(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
+    any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
+    ChebyshevUTransformPlan{T,2,kindtuple(USECONDKIND,N,dims...)}(FFTW.plan_r2r!(x, USECONDKIND, dims...; kws...))
 end
 
-function plan_chebyshevutransform(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevUTransformPlan{T,1,false,(9,)}()
+        ChebyshevUTransformPlan{T,1,kindtuple(UFIRSTKIND,N,dims...),false,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
     else
-        ChebyshevUTransformPlan{T,1,false,(9,)}(FFTW.plan_r2r!(x, FFTW.RODFT10))
+        ChebyshevUTransformPlan{T,1,kindtuple(UFIRSTKIND,N,dims...)}(FFTW.plan_r2r(x, UFIRSTKIND, dims...; kws...))
     end
 end
-function plan_chebyshevutransform(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
-    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
-    ChebyshevUTransformPlan{T,2,false,(7,)}(FFTW.plan_r2r!(x, FFTW.RODFT00))
+function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
+    any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
+    ChebyshevUTransformPlan{T,2,kindtuple(USECONDKIND,N,dims...)}(FFTW.plan_r2r(x, USECONDKIND, dims...; kws...))
 end
 
-plan_chebyshevutransform!(x::AbstractVector) = plan_chebyshevutransform!(x, Val(1))
-plan_chebyshevutransform(x::AbstractVector) = plan_chebyshevutransform(x, Val(1))
+plan_chebyshevutransform!(x::AbstractArray, dims...; kws...) = plan_chebyshevutransform!(x, Val(1), dims...; kws...)
+plan_chebyshevutransform(x::AbstractArray, dims...; kws...) = plan_chebyshevutransform(x, Val(1), dims...; kws...)
 
 
-function *(P::ChebyshevUTransformPlan{T,1,true},x::AbstractVector{T}) where T
+@inline function _chebu1_prescale!(_, x::AbstractVector{T}) where T
     n = length(x)
-    assert_applicable(P, x)
-    n ≤ 1 && return x
-
     for k=1:n # sqrt(1-x_j^2) weight
         x[k] *= sinpi(one(T)/(2n) + (k-one(T))/n)/n
     end
+    x
+end
+
+@inline function _chebu1_postscale!(_, x::AbstractVector{T}) where T
+    n = length(x)
+    for k=1:n # sqrt(1-x_j^2) weight
+        x[k] /= sinpi(one(T)/(2n) + (k-one(T))/n)/n
+    end
+    x
+end
+
+function *(P::ChebyshevUTransformPlan{T,1,K,true}, x::AbstractVector{T}) where {T,K}
+    length(x) ≤ 1 && return x
+    _chebu1_prescale!(P.plan.region, x)
     P.plan * x
 end
 
-function *(P::ChebyshevUTransformPlan{T,2,true}, x::AbstractVector{T}) where T
+function mul!(y::AbstractVector{T}, P::ChebyshevUTransformPlan{T,1,K,false}, x::AbstractVector{T}) where {T,K}
     n = length(x)
-    assert_applicable(P, x)
-    n ≤ 1 && return x
+    length(x) ≤ 1 && return copyto!(y, x)
+    _chebu1_prescale!(P.plan.region, x)
+    _plan_mul!(y, P.plan, x)
+    _chebu1_postscale!(P.plan.region, x)
+    y
+end
 
+@inline function _chebu2_prescale!(_, x::AbstractVector{T}) where T
+    n = length(x)
     c = one(T)/ (n+1)
     for k=1:n # sqrt(1-x_j^2) weight
         x[k] *= sinpi(k*c)
     end
-    lmul!(c, P.plan * x)
+    x
 end
 
-chebyshevutransform!(x::AbstractVector{T}, kind=Val(1)) where {T<:fftwNumber} =
-    plan_chebyshevutransform!(x, kind)*x
+@inline function _chebu2_postscale!(_, x::AbstractVector{T}) where T
+    n = length(x)
+    c = one(T)/ (n+1)
+    @inbounds for k=1:n # sqrt(1-x_j^2) weight
+        x[k] /= sinpi(k*c)
+    end
+    x
+end
+
+function *(P::ChebyshevUTransformPlan{T,2,K,true}, x::AbstractVector{T}) where {T,K}
+    n = length(x)
+    n ≤ 1 && return x
+    _chebu2_prescale!(P.plan.region, x)
+    lmul!(one(T)/ (n+1), P.plan * x)
+end
+
+function mul!(y::AbstractVector{T}, P::ChebyshevUTransformPlan{T,2,K,false}, x::AbstractVector{T}) where {T,K}
+    n = length(x)
+    n ≤ 1 && return copyto!(y, x)
+    _chebu2_prescale!(P.plan.region, x)
+    _plan_mul!(y, P.plan, x)
+    _chebu2_postscale!(P.plan.region, x)
+    lmul!(one(T)/ (n+1), y)
+end
+
+*(P::ChebyshevUTransformPlan{T,kind,K,false,N}, x::AbstractArray{T,N}) where {T,kind,K,N} =
+    mul!(similar(x), P, x)
+
+chebyshevutransform!(x::AbstractVector{T}, dims...; kws...) where {T<:fftwNumber} =
+    plan_chebyshevutransform!(x, dims...; kws...)*x
 
 
 """
@@ -296,94 +440,113 @@ chebyshevutransform!(x::AbstractVector{T}, kind=Val(1)) where {T<:fftwNumber} =
 transforms from values on a Chebyshev grid of the first or second kind to Chebyshev
 coefficients of the 2nd kind (Chebyshev U expansion).
 """
-chebyshevutransform(x, kind=Val(1)) = chebyshevutransform!(Array(x), kind)
+chebyshevutransform(x, dims...; kws...) = plan_chebyshevutransform(x, dims...; kws...)*x
 
-*(P::ChebyshevUTransformPlan{T,k,false}, x::AbstractVector{T}) where {T,k} = ChebyshevUTransformPlan{T,k,true}(P)*Array(x)
 
 ## Inverse transforms take ChebyshevU coefficients and produce values at ChebyshevU points of the first and second kinds
+const IUFIRSTKIND = FFTW.RODFT01
 
-
-struct IChebyshevUTransformPlan{T,kind,inplace,P} <: ChebyshevPlan{T}
-    plan::FFTW.r2rFFTWPlan{T,P,true,1,UnitRange{Int}}
-    IChebyshevUTransformPlan{T,kind,inplace,P}(plan) where {T,kind,inplace,P} = new{T,kind,inplace,P}(plan)
-    IChebyshevUTransformPlan{T,kind,inplace,P}() where {T,kind,inplace,P} = new{T,kind,inplace,P}()
+struct IChebyshevUTransformPlan{T,kind,K,inplace,N,R} <: ChebyshevPlan{T}
+    plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}
+    IChebyshevUTransformPlan{T,kind,K,inplace,N,R}(plan) where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}(plan)
+    IChebyshevUTransformPlan{T,kind,K,inplace,N,R}() where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}()
 end
 
-IChebyshevUTransformPlan{T,kind,inplace}(F::FFTW.r2rFFTWPlan{T,P}) where {T,kind,inplace,P} = 
-    IChebyshevUTransformPlan{T,kind,inplace,P}(F)
-
-IChebyshevUTransformPlan{T,kind,true}(F::IChebyshevUTransformPlan{T,kind,false,P}) where {T,kind,P} = 
-    IChebyshevUTransformPlan{T,kind,true,P}(F.plan)
+IChebyshevUTransformPlan{T,kind,K}(F::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
+    IChebyshevUTransformPlan{T,kind,K,inplace,N,R}(F)
 
-function plan_ichebyshevutransform!(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+function plan_ichebyshevutransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevUTransformPlan{T,1,true,(8,)}()
+        IChebyshevUTransformPlan{T,1,kindtuple(IUFIRSTKIND,N,dims...),true,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
     else
-        IChebyshevUTransformPlan{T,1,true,(8,)}(FFTW.plan_r2r!(x, FFTW.RODFT01))
+        IChebyshevUTransformPlan{T,1,kindtuple(IUFIRSTKIND,N,dims...)}(FFTW.plan_r2r!(x, IUFIRSTKIND, dims...; kws...))
     end
 end
-function plan_ichebyshevutransform!(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
-    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
-    IChebyshevUTransformPlan{T,2,true,(7,)}(FFTW.plan_r2r!(x, FFTW.RODFT00))
+function plan_ichebyshevutransform!(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
+    any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
+    IChebyshevUTransformPlan{T,2,kindtuple(USECONDKIND,N,dims...)}(FFTW.plan_r2r!(x, USECONDKIND))
 end
 
-function plan_ichebyshevutransform(x::AbstractVector{T}, ::Val{1}) where T<:fftwNumber
+function plan_ichebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevUTransformPlan{T,1,false,(8,)}()
+        IChebyshevUTransformPlan{T,1,kindtuple(IUFIRSTKIND,N,dims...),false,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
     else
-        IChebyshevUTransformPlan{T,1,false,(8,)}(FFTW.plan_r2r!(x, FFTW.RODFT01))
+        IChebyshevUTransformPlan{T,1,kindtuple(IUFIRSTKIND,N,dims...)}(FFTW.plan_r2r(x, IUFIRSTKIND, dims...; kws...))
     end
 end
-function plan_ichebyshevutransform(x::AbstractVector{T}, ::Val{2}) where T<:fftwNumber
-    length(x) ≤ 1 && throw(ArgumentError("Vector must contain at least 2 entries"))
-    IChebyshevUTransformPlan{T,2,false,(7,)}(FFTW.plan_r2r!(x, FFTW.RODFT00))
+function plan_ichebyshevutransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
+    any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
+    IChebyshevUTransformPlan{T,2,kindtuple(USECONDKIND,N,dims...)}(FFTW.plan_r2r(x, USECONDKIND))
 end
 
-plan_ichebyshevutransform!(x::AbstractVector) = plan_ichebyshevutransform!(x, Val(1))
-plan_ichebyshevutransform(x::AbstractVector) = plan_ichebyshevutransform(x, Val(1))
 
+plan_ichebyshevutransform!(x::AbstractArray, dims...; kws...) = plan_ichebyshevutransform!(x, Val(1), dims...; kws...)
+plan_ichebyshevutransform(x::AbstractArray, dims...; kws...) = plan_ichebyshevutransform(x, Val(1), dims...; kws...)
 
-function *(P::IChebyshevUTransformPlan{T,1,true}, x::AbstractVector{T}) where T<:fftwNumber
+function _ichebyu1_postscale!(_, x::AbstractVector{T}) where T
     n = length(x)
-    assert_applicable(P, x)
-    n ≤ 1 && return x
-
-    x = P.plan * x
-    for k=1:n # sqrt(1-x_j^2) weight
+    @inbounds for k=1:n # sqrt(1-x_j^2) weight
         x[k] /= 2sinpi(one(T)/(2n) + (k-one(T))/n)
     end
     x
 end
+function *(P::IChebyshevUTransformPlan{T,1,K,true}, x::AbstractVector{T}) where {T<:fftwNumber,K}
+    n = length(x)
+    n ≤ 1 && return x
 
+    x = P.plan * x
+    _ichebyu1_postscale!(P.plan.region, x)
+end
 
-
-function *(P::IChebyshevUTransformPlan{T,2,true}, x::AbstractVector{T}) where T<:fftwNumber
+function mul!(y::AbstractVector{T}, P::IChebyshevUTransformPlan{T,1,K,false}, x::AbstractVector{T}) where {T<:fftwNumber,K}
     n = length(x)
-    assert_applicable(P, x)
+    length(y) == n || throw(DimensionMismatch("output must match dimension"))
     n ≤ 1 && return x
 
+    _plan_mul!(y, P.plan, x)
+    _ichebyu1_postscale!(P.plan.region, y)
+end
+
+function _ichebu2_rescale!(_, x::AbstractVector{T}) where T
+    n = length(x)
     c = one(T)/ (n+1)
-    lmul!((n+1)/(2n+2*one(T)), x)
-    x = P.plan * x
     for k=1:n # sqrt(1-x_j^2) weight
         x[k] /= sinpi(k*c)
     end
+    ldiv!(2, x)
     x
 end
 
-ichebyshevutransform!(x::AbstractVector{T}, kind=Val(1)) where {T<:fftwNumber} =
-    plan_ichebyshevutransform!(x, kind)*x
+function *(P::IChebyshevUTransformPlan{T,2,K,true}, x::AbstractVector{T}) where {T<:fftwNumber,K}
+    n = length(x)
+    n ≤ 1 && return x
+
+    x = P.plan * x
+    _ichebu2_rescale!(P.plan.region, x)
+end
+
+function mul!(y::AbstractVector{T}, P::IChebyshevUTransformPlan{T,2,K,false}, x::AbstractVector{T}) where {T<:fftwNumber,K}
+    n = length(x)
+    length(y) == n || throw(DimensionMismatch("output must match dimension"))
+    n ≤ 1 && return x
+
+    _plan_mul!(y, P.plan, x)
+    _ichebu2_rescale!(P.plan.region, y)
+end
+
+ichebyshevutransform!(x::AbstractVector{T}, dims...; kwds...) where {T<:fftwNumber} =
+    plan_ichebyshevutransform!(x, dims...; kwds...)*x
 
-ichebyshevutransform(x, kind=Val(1)) = ichebyshevutransform!(Array(x), kind)
+ichebyshevutransform(x, dims...; kwds...) = plan_ichebyshevutransform(x, dims...; kwds...)*x
 
-*(P::IChebyshevUTransformPlan{T,k,false},x::AbstractVector{T}) where {T,k} = 
-    IChebyshevUTransformPlan{T,k,true}(P)*Array(x)
+*(P::IChebyshevUTransformPlan{T,k,K,false,N}, x::AbstractArray{T,N}) where {T,k,K,N} =
+    mul!(similar(x), P, x)
 
 
 ## Code generation for integer inputs
 
 for func in (:chebyshevtransform,:ichebyshevtransform,:chebyshevutransform,:ichebyshevutransform)
-    @eval $func(x::AbstractVector{T}, kind=Val(1)) where {T<:Integer} = $func(convert(AbstractVector{Float64},x), kind)
+    @eval $func(x::AbstractVector{T}, dims...; kwds...) where {T<:Integer} = $func(convert(AbstractVector{Float64},x), dims...; kwds...)
 end
 
 
@@ -437,21 +600,23 @@ chebyshevpoints(n::Integer, kind=Val(1)) = chebyshevpoints(Float64, n, kind)
 
 ###
 # BigFloat
-# Use `Nothing` and fall back too FFT
+# Use `Nothing` and fall back to FFT
 ###
 
-plan_chebyshevtransform(x::AbstractVector{T}, ::Val{kind}) where {T,kind} =
-    ChebyshevTransformPlan{T,kind,false,Nothing}()
-plan_ichebyshevtransform(x::AbstractVector{T}, ::Val{kind}) where {T,kind} =
-    IChebyshevTransformPlan{T,kind,false,Nothing}()
 
-plan_chebyshevtransform!(x::AbstractVector{T}, ::Val{kind}) where {T,kind} =
-    ChebyshevTransformPlan{T,kind,true,Nothing}()
-plan_ichebyshevtransform!(x::AbstractVector{T}, ::Val{kind}) where {T,kind} =
-    IChebyshevTransformPlan{T,kind,true,Nothing}()
+plan_chebyshevtransform(x::AbstractArray{T,N}, ::Val{kind}, dims...; kws...) where {T,N,kind} =
+    ChebyshevTransformPlan{T,kind,Nothing,false,N,UnitRange{Int}}()
+plan_ichebyshevtransform(x::AbstractArray{T,N}, ::Val{kind}, dims...; kws...) where {T,N,kind} =
+    IChebyshevTransformPlan{T,kind,Nothing,false,N,UnitRange{Int}}()
+
+plan_chebyshevtransform!(x::AbstractArray{T,N}, ::Val{kind}, dims...; kws...) where {T,N,kind} =
+    ChebyshevTransformPlan{T,kind,Nothing,true,N,UnitRange{Int}}()
+plan_ichebyshevtransform!(x::AbstractArray{T,N}, ::Val{kind}, dims...; kws...) where {T,N,kind} =
+    IChebyshevTransformPlan{T,kind,Nothing,true,N,UnitRange{Int}}()
+
 
 #following Chebfun's @Chebtech1/vals2coeffs.m and @Chebtech2/vals2coeffs.m
-function *(P::ChebyshevTransformPlan{T,1,false,Nothing}, x::AbstractVector{T}) where T
+function *(P::ChebyshevTransformPlan{T,1,Nothing,false}, x::AbstractVector{T}) where T
     n = length(x)
     if n == 1
         x
@@ -465,7 +630,7 @@ function *(P::ChebyshevTransformPlan{T,1,false,Nothing}, x::AbstractVector{T}) w
 end
 
 
-# function *(P::ChebyshevTransformPlan{T,2,false,Nothing}, x::AbstractVector{T}) where T
+# function *(P::ChebyshevTransformPlan{T,1,K,Nothing,false}, x::AbstractVector{T}) where {T,K}
 #     n = length(x)
 #     if n == 1
 #         x
@@ -478,14 +643,14 @@ end
 # end
 
 
-*(P::ChebyshevTransformPlan{T,1,true,Nothing}, x::AbstractVector{T}) where T =
-    copyto!(x, ChebyshevTransformPlan{T,1,false,Nothing}() * x)
+*(P::ChebyshevTransformPlan{T,1,Nothing,true,N,R}, x::AbstractVector{T}) where {T,N,R} =
+    copyto!(x, ChebyshevTransformPlan{T,1,Nothing,false,N,R}() * x)
 # *(P::ChebyshevTransformPlan{T,2,true,Nothing}, x::AbstractVector{T}) where T =
 #     copyto!(x, ChebyshevTransformPlan{T,2,false,Nothing}() * x)
 
 
 #following Chebfun's @Chebtech1/vals2coeffs.m and @Chebtech2/vals2coeffs.m
-function *(P::IChebyshevTransformPlan{T,1,false,Nothing}, x::AbstractVector{T}) where T
+function *(P::IChebyshevTransformPlan{T,1,Nothing,false}, x::AbstractVector{T}) where T
     n = length(x)
     if n == 1
         x
@@ -498,7 +663,7 @@ function *(P::IChebyshevTransformPlan{T,1,false,Nothing}, x::AbstractVector{T})
     end
 end
 
-# function *(P::IChebyshevTransformPlan{T,2,true,Nothing}, x::AbstractVector{T}) where T
+# function *(P::IChebyshevTransformPlan{T,2,K,Nothing,true}, x::AbstractVector{T}) where {T,K}
 #     n = length(x)
 #     if n == 1
 #         x
@@ -511,7 +676,7 @@ end
 #     end
 # end
 
-*(P::IChebyshevTransformPlan{T,1,true,Nothing}, x::AbstractVector{T}) where T =
-    copyto!(x, IChebyshevTransformPlan{T,1,false,Nothing}() * x)
-# *(P::IChebyshevTransformPlan{T,2,false,Nothing}, x::AbstractVector{T}) where T =
-#     IChebyshevTransformPlan{T,2,true,Nothing}() * copy(x)
\ No newline at end of file
+*(P::IChebyshevTransformPlan{T,1,Nothing,true,N,R}, x::AbstractVector{T}) where {T,N,R} =
+    copyto!(x, IChebyshevTransformPlan{T,1,Nothing,false,N,R}() * x)
+# *(P::IChebyshevTransformPlan{T,SECONDKIND,false,Nothing}, x::AbstractVector{T}) where T =
+#     IChebyshevTransformPlan{T,SECONDKIND,true,Nothing}() * copy(x)
\ No newline at end of file
diff --git a/src/clenshaw.jl b/src/clenshaw.jl
index 297df736..e57d8952 100644
--- a/src/clenshaw.jl
+++ b/src/clenshaw.jl
@@ -49,10 +49,20 @@ evaluates the orthogonal polynomial expansion with coefficients `c` at points `x
 where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
 as defined in DLMF,
 overwriting `x` with the results.
+
+If `c` is a matrix this treats each column as a separate vector of coefficients, returning a vector
+if `x` is a number and a matrix if `x` is a vector.
 """
 clenshaw!(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector) =
     clenshaw!(c, A, B, C, x, Ones{eltype(x)}(length(x)), x)
 
+clenshaw!(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number, f::AbstractVector) =
+    clenshaw!(c, A, B, C, x, one(eltype(x)), f)
+
+
+clenshaw!(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector, f::AbstractMatrix) =
+    clenshaw!(c, A, B, C, x, Ones{eltype(x)}(length(x)), f)
+
 
 """
 clenshaw!(c, A, B, C, x, ϕ₀, f)
@@ -67,6 +77,22 @@ function clenshaw!(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::A
 end
 
 
+function clenshaw!(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number, ϕ₀::Number, f::AbstractVector)
+    size(c,2) == length(f) || throw(DimensionMismatch("coeffients size and output length must match"))
+    @inbounds for j in axes(c,2)
+        f[j] = ϕ₀ * clenshaw(view(c,:,j), A, B, C, x)
+    end
+    f
+end
+
+function clenshaw!(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector, ϕ₀::AbstractVector, f::AbstractMatrix)
+    (size(x,1),size(c,2)) == size(f) || throw(DimensionMismatch("coeffients size and output length must match"))
+    @inbounds for j in axes(c,2)
+        clenshaw!(view(c,:,j), A, B, C, x, ϕ₀, view(f,:,j))
+    end
+    f
+end
+
 Base.@propagate_inbounds _clenshaw_next(n, A, B, C, x, c, bn1, bn2) = muladd(muladd(A[n],x,B[n]), bn1, muladd(-C[n+1],bn2,c[n]))
 Base.@propagate_inbounds _clenshaw_next(n, A, ::Zeros, C, x, c, bn1, bn2) = muladd(A[n]*x, bn1, muladd(-C[n+1],bn2,c[n]))
 # Chebyshev U
@@ -83,6 +109,9 @@ evaluates the orthogonal polynomial expansion with coefficients `c` at points `x
 where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
 as defined in DLMF.
 `x` may also be a single `Number`.
+
+If `c` is a matrix this treats each column as a separate vector of coefficients, returning a vector
+if `x` is a number and a matrix if `x` is a vector.
 """
 
 function clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number)
@@ -106,6 +135,16 @@ end
 clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector) =
     clenshaw!(c, A, B, C, copy(x))
 
+function clenshaw(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number)
+    T = promote_type(eltype(c),eltype(A),eltype(B),eltype(C),typeof(x))
+    clenshaw!(c, A, B, C, x, Vector{T}(undef, size(c,2)))
+end
+
+function clenshaw(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector)
+    T = promote_type(eltype(c),eltype(A),eltype(B),eltype(C),typeof(x))
+    clenshaw!(c, A, B, C, x, Matrix{T}(undef, size(x,1), size(c,2)))
+end
+
 ###
 # Chebyshev T special cases
 ###
@@ -155,4 +194,22 @@ function clenshaw(c::AbstractVector, x::Number)
     end
 end
 
+function clenshaw!(c::AbstractMatrix, x::Number, f::AbstractVector)
+    size(c,2) == length(f) || throw(DimensionMismatch("coeffients size and output length must match"))
+    @inbounds for j in axes(c,2)
+        f[j] = clenshaw(view(c,:,j), x)
+    end
+    f
+end
+
+function clenshaw!(c::AbstractMatrix, x::AbstractVector, f::AbstractMatrix)
+    (size(x,1),size(c,2)) == size(f) || throw(DimensionMismatch("coeffients size and output length must match"))
+    @inbounds for j in axes(c,2)
+        clenshaw!(view(c,:,j), x, view(f,:,j))
+    end
+    f
+end
+
 clenshaw(c::AbstractVector, x::AbstractVector) = clenshaw!(c, copy(x))
+clenshaw(c::AbstractMatrix, x::Number) = clenshaw!(c, x, Vector{promote_type(eltype(c),typeof(x))}(undef, size(c,2)))
+clenshaw(c::AbstractMatrix, x::AbstractVector) = clenshaw!(c, x, Matrix{promote_type(eltype(c),eltype(x))}(undef, size(x,1), size(c,2)))
\ No newline at end of file
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 0383df2c..a0e188ad 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -113,12 +113,12 @@ using FastTransforms, Test
             f̃ = copy(f)
             f̄ = copy(f̌)
             P = @inferred(plan_chebyshevutransform(f))
-            @test P*f == f̌
+            @test P*f ≈ f̌
             @test f == f̃
             @test_throws ArgumentError P * T[1,2]
             P = @inferred(plan_chebyshevutransform!(f))
-            @test P*f == f̌
-            @test f == f̌
+            @test P*f ≈ f̌
+            @test f ≈ f̌
             @test_throws ArgumentError P * T[1,2]
             Pi = @inferred(plan_ichebyshevutransform(f̌))
             @test Pi*f̌ ≈ f̃
@@ -135,7 +135,6 @@ using FastTransforms, Test
             @test ichebyshevutransform(T[]) == T[]
         end
     end
-
     @testset "Chebyshev second kind points <-> second kind coefficients" begin
         for T in (Float32, Float64, ComplexF32, ComplexF64)
             n = 20
@@ -150,16 +149,16 @@ using FastTransforms, Test
             f̃ = copy(f)
             f̄ = copy(f̌)
             P = @inferred(plan_chebyshevutransform(f, Val(2)))
-            @test @inferred(P*f) == f̌
-            @test f == f̃
+            @test @inferred(P*f) ≈ f̌
+            @test f ≈ f̃
             @test_throws ArgumentError P * T[1,2]
             P = @inferred(plan_chebyshevutransform!(f, Val(2)))
-            @test @inferred(P*f) == f̌
-            @test f == f̌
+            @test @inferred(P*f) ≈ f̌
+            @test f ≈ f̌
             @test_throws ArgumentError P * T[1,2]
             Pi = @inferred(plan_ichebyshevutransform(f̌, Val(2)))
             @test @inferred(Pi*f̌) ≈ f̃
-            @test f̌ == f̄
+            @test f̌ ≈ f̄
             @test_throws ArgumentError Pi * T[1,2]
             Pi = @inferred(plan_ichebyshevutransform!(f̌, Val(2)))
             @test @inferred(Pi*f̌) ≈ f̃
@@ -174,34 +173,34 @@ using FastTransforms, Test
     end
 
     @testset "matrix" begin
+        X = randn(4,5)
+        @testset "chebyshevtransform" begin
+            @test @inferred(chebyshevtransform(X,1)) ≈ @inferred(chebyshevtransform!(copy(X),1)) ≈ hcat(chebyshevtransform.([X[:,k] for k=axes(X,2)])...)
+            @test chebyshevtransform(X,2) ≈ chebyshevtransform!(copy(X),2) ≈ hcat(chebyshevtransform.([X[k,:] for k=axes(X,1)])...)'
+            @test @inferred(chebyshevtransform(X,Val(2),1)) ≈ @inferred(chebyshevtransform!(copy(X),Val(2),1)) ≈ hcat(chebyshevtransform.([X[:,k] for k=axes(X,2)],Val(2))...)
+            @test chebyshevtransform(X,Val(2),2) ≈ chebyshevtransform!(copy(X),Val(2),2) ≈ hcat(chebyshevtransform.([X[k,:] for k=axes(X,1)],Val(2))...)'
+
+            @test @inferred(chebyshevtransform(X)) ≈ @inferred(chebyshevtransform!(copy(X))) ≈ chebyshevtransform(chebyshevtransform(X,1),2)
+            @test @inferred(chebyshevtransform(X,Val(2))) ≈ @inferred(chebyshevtransform!(copy(X),Val(2))) ≈ chebyshevtransform(chebyshevtransform(X,Val(2),1),Val(2),2)
+        end
+
+        @testset "ichebyshevtransform" begin
+            @test @inferred(ichebyshevtransform(X,1)) ≈ @inferred(ichebyshevtransform!(copy(X),1)) ≈ hcat(ichebyshevtransform.([X[:,k] for k=axes(X,2)])...)
+            @test ichebyshevtransform(X,2) ≈ ichebyshevtransform!(copy(X),2) ≈ hcat(ichebyshevtransform.([X[k,:] for k=axes(X,1)])...)'
+            @test @inferred(ichebyshevtransform(X,Val(2),1)) ≈ @inferred(ichebyshevtransform!(copy(X),Val(2),1)) ≈ hcat(ichebyshevtransform.([X[:,k] for k=axes(X,2)],Val(2))...)
+            @test ichebyshevtransform(X,Val(2),2) ≈ ichebyshevtransform!(copy(X),Val(2),2) ≈ hcat(ichebyshevtransform.([X[k,:] for k=axes(X,1)],Val(2))...)'
+
+            @test @inferred(ichebyshevtransform(X)) ≈ @inferred(ichebyshevtransform!(copy(X))) ≈ ichebyshevtransform(ichebyshevtransform(X,1),2)
+            @test @inferred(ichebyshevtransform(X,Val(2))) ≈ @inferred(ichebyshevtransform!(copy(X),Val(2))) ≈ ichebyshevtransform(ichebyshevtransform(X,Val(2),1),Val(2),2)            
+
+            @test ichebyshevtransform(chebyshevtransform(X)) ≈ X
+            @test chebyshevtransform(ichebyshevtransform(X)) ≈ X
+        end
+
         X = randn(1,1)
         @test chebyshevtransform!(copy(X), Val(1)) == ichebyshevtransform!(copy(X), Val(1)) == X
         @test_throws ArgumentError chebyshevtransform!(copy(X), Val(2))
         @test_throws ArgumentError ichebyshevtransform!(copy(X), Val(2))
-
-        X = randn(10,11)
-        
-        # manual 2D Chebyshev
-        X̌ = copy(X)
-        for j in axes(X̌,2)
-            chebyshevtransform!(view(X̌,:,j))
-        end
-        for k in axes(X̌,1)
-            chebyshevtransform!(view(X̌,k,:))
-        end
-        @test chebyshevtransform!(copy(X), Val(1)) ≈ X̌
-        @test ichebyshevtransform!(copy(X̌), Val(1)) ≈ X
-
-        # manual 2D Chebyshev
-        X̌ = copy(X)
-        for j in axes(X̌,2)
-            chebyshevtransform!(view(X̌,:,j), Val(2))
-        end
-        for k in axes(X̌,1)
-            chebyshevtransform!(view(X̌,k,:), Val(2))
-        end
-        @test chebyshevtransform!(copy(X), Val(2)) ≈ X̌
-        @test ichebyshevtransform!(copy(X̌), Val(2)) ≈ X
     end
 
     @testset "Integer" begin
diff --git a/test/clenshawtests.jl b/test/clenshawtests.jl
index aff2141f..14485c38 100644
--- a/test/clenshawtests.jl
+++ b/test/clenshawtests.jl
@@ -30,6 +30,12 @@ import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrenc
                 # modifies x and xv
                 @test clenshaw!(cv2, xv) == xv == x == clenshaw([1,3], elty[1,0,0.1])
             end
+
+            @testset "matrix coefficients" begin
+                c = [1 2; 3 4; 5 6]
+                @test clenshaw(c,0.1) ≈ [clenshaw(c[:,1],0.1), clenshaw(c[:,2],0.1)]
+                @test clenshaw(c,[0.1,0.2]) ≈ [clenshaw(c[:,1], 0.1) clenshaw(c[:,2], 0.1); clenshaw(c[:,1], 0.2) clenshaw(c[:,2], 0.2)] 
+            end
         end
     end
 
@@ -46,6 +52,12 @@ import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrenc
         c = [1,2,3]
         @test c'forwardrecurrence(3, A, B, C, 0.1) ≈ clenshaw([1,2,3], A, B, C, 0.1) ≈ 
             1 + (2sin(2acos(0.1)) + 3sin(3acos(0.1)))/sqrt(1-0.1^2)
+
+        @testset "matrix coefficients" begin
+            c = [1 2; 3 4; 5 6]
+            @test clenshaw(c,A,B,C,0.1) ≈ [clenshaw(c[:,1],A,B,C,0.1), clenshaw(c[:,2],A,B,C,0.1)]
+            @test clenshaw(c,A,B,C,[0.1,0.2]) ≈ [clenshaw(c[:,1], A,B,C,0.1) clenshaw(c[:,2], A,B,C,0.1); clenshaw(c[:,1], A,B,C,0.2) clenshaw(c[:,2], A,B,C,0.2)] 
+        end            
     end
 
     @testset "Chebyshev-as-general" begin

From fed406e7fac4c63770571c3c5a0f53fabd5c5078 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 2 Dec 2021 15:21:28 +0000
Subject: [PATCH 086/222] CompatHelper: bump compat for "SpecialFunctions" to
 "2" (#156)

* CompatHelper: bump compat for "SpecialFunctions" to "2"

* v0.13.1

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Sheehan Olver <solver@mac.com>
---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 7fa3cf03..ad8c02a9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.13.0"
+version = "0.13.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -28,6 +28,6 @@ FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.5.1"
 FillArrays = "0.9, 0.10, 0.11, 0.12"
 Reexport = "0.2, 1.0"
-SpecialFunctions = "0.10, 1"
+SpecialFunctions = "0.10, 1, 2"
 ToeplitzMatrices = "0.6, 0.7"
 julia = "1.6"

From f1c55dbad8997cacff585c871553ba8697255533 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Sun, 5 Dec 2021 21:23:14 -0600
Subject: [PATCH 087/222] fix #159

---
 Project.toml | 2 +-
 src/nufft.jl | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index ad8c02a9..368929e1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.13.1"
+version = "0.13.2"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/nufft.jl b/src/nufft.jl
index faef09f7..2e49f37d 100644
--- a/src/nufft.jl
+++ b/src/nufft.jl
@@ -187,6 +187,9 @@ mul_for_col_J!(y::AbstractVecOrMat{T}, A::AbstractMatrix{T}, x::AbstractVecOrMat
 function mul_for_col_J!(y::AbstractVecOrMat{T}, A::AbstractMatrix{T}, x::AbstractVecOrMat{T}, istart::Int, jstart::Int, INCX::Int, INCY::Int) where T
     m, n = size(A)
     ishift, jshift = istart-INCY, jstart-INCX
+    @inbounds for i = 1:m
+        y[ishift+i*INCY] = zero(T)
+    end
     @inbounds for j = 1:n
         xj = x[jshift+j*INCX]
         for i = 1:m

From 04a07df7bf0d7f0fc84f24b1d082420fb38c9192 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 5 Jan 2022 20:47:54 -0700
Subject: [PATCH 088/222] update for jll and try to fix doc plots

---
 .github/workflows/ci.yml | 5 ++---
 Project.toml             | 6 +++---
 docs/Project.toml        | 1 +
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6c2b3b92..169112d7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,8 +9,7 @@ jobs:
     strategy:
       matrix:
         version:
-          - '1.6'
-          - '^1.7.0-0'
+          - '1.7'
         os:
           - ubuntu-latest
           - macOS-latest
@@ -51,7 +50,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
         with:
-          version: '1.5'
+          version: '1.7'
       - run: |
           julia --project=docs -e '
             using Pkg
diff --git a/Project.toml b/Project.toml
index 368929e1..bee02224 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.13.2"
+version = "0.13.3"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -25,9 +25,9 @@ BinaryProvider = "0.5"
 DSP = "0.6, 0.7"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.5.1"
+FastTransforms_jll = "0.5.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
 ToeplitzMatrices = "0.6, 0.7"
-julia = "1.6"
+julia = "1.7"
diff --git a/docs/Project.toml b/docs/Project.toml
index 313c185c..cabe3c60 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -7,3 +7,4 @@ Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 
 [compat]
 Documenter = "~0.24"
+Literate = "~2.8"

From c2e293651135bca96f1a5724b9d6553123c84517 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 5 Jan 2022 21:26:52 -0700
Subject: [PATCH 089/222] Goodbye, Travis!

---
 .travis.yml | 27 ---------------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 83adbc61..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Documentation: http://docs.travis-ci.com/user/languages/julia/
-language: julia
-os:
-  - freebsd
-  - linux
-arch:
-  - x64
-  - x86
-  - arm64
-julia:
-  - 1.3
-  - 1.5
-  - nightly
-jobs:
-  exclude:
-    - os: freebsd
-      arch: x86
-    - os: freebsd
-      arch: arm64
-  allow_failures:
-    - julia: nightly
-    - arch: arm64
-notifications:
-  email: false
-cache:
-  directories:
-    - $HOME/.julia/artifacts

From b150587c2a5582057055b9490923d8fe2720c696 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 5 Jan 2022 21:27:51 -0700
Subject: [PATCH 090/222] remove badge

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d0058f7c..00302bba 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # FastTransforms.jl
 
-[![Build Status](https://github.com/JuliaApproximation/FastTransforms.jl/workflows/CI/badge.svg)](https://github.com/JuliaApproximation/FastTransforms.jl/actions?query=workflow%3ACI) [![Travis](https://travis-ci.com/JuliaApproximation/FastTransforms.jl.svg?branch=master)](https://travis-ci.com/JuliaApproximation/FastTransforms.jl) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/dev)
+[![Build Status](https://github.com/JuliaApproximation/FastTransforms.jl/workflows/CI/badge.svg)](https://github.com/JuliaApproximation/FastTransforms.jl/actions?query=workflow%3ACI) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/dev)
 
 `FastTransforms.jl` allows the user to conveniently work with orthogonal polynomials with degrees well into the millions.
 

From 6bad0ea0e8a0e9ce83774414719543fdbd1feefc Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 2 Feb 2022 12:22:00 -0600
Subject: [PATCH 091/222] close #162

---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index bee02224..a6cbab8c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.13.3"
+version = "0.13.4"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -25,7 +25,7 @@ BinaryProvider = "0.5"
 DSP = "0.6, 0.7"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.5.2"
+FastTransforms_jll = "0.5.3"
 FillArrays = "0.9, 0.10, 0.11, 0.12"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"

From 7ee52398760809d5b19c44e68c12c3bdf6e9d84d Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 2 Feb 2022 14:39:44 -0600
Subject: [PATCH 092/222] Close #163 & #141

strictly speaking, this expands the API and doesn't change it, so all good with versioning

the multi-d conveniences are added. i suppose it's possible to add conveniences for ultra2cheb, but i'll let someone file an issue before deciding what that should be
---
 Project.toml             |  4 ++--
 src/libfasttransforms.jl | 33 ++++++++++++++++++++++-----------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/Project.toml b/Project.toml
index a6cbab8c..9f79f5d6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.13.4"
+version = "0.13.5"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -25,7 +25,7 @@ BinaryProvider = "0.5"
 DSP = "0.6, 0.7"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.5.3"
+FastTransforms_jll = "0.5.4"
 FillArrays = "0.9, 0.10, 0.11, 0.12"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 9a073e29..0391095c 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -619,6 +619,17 @@ function plan_spinsph2fourier(::Type{Complex{Float64}}, n::Integer, s::Integer)
     return FTPlan{Complex{Float64}, 2, SPINSPHERE}(plan, n)
 end
 
+plan_disk2cxf(::Type{Float64}, n::Integer, α) = plan_disk2cxf(Float64, n, α, 0)
+plan_disk2cxf(::Type{Float64}, n::Integer) = plan_disk2cxf(Float64, n, 0)
+plan_rectdisk2cheb(::Type{Float64}, n::Integer) = plan_rectdisk2cheb(Float64, n, 0)
+plan_tri2cheb(::Type{Float64}, n::Integer, α, β) = plan_tri2cheb(Float64, n, α, β, 0)
+plan_tri2cheb(::Type{Float64}, n::Integer, α) = plan_tri2cheb(Float64, n, α, 0)
+plan_tri2cheb(::Type{Float64}, n::Integer) = plan_tri2cheb(Float64, n, 0)
+plan_tet2cheb(::Type{Float64}, n::Integer, α, β, γ) = plan_tet2cheb(Float64, n, α, β, γ, 0)
+plan_tet2cheb(::Type{Float64}, n::Integer, α, β) = plan_tet2cheb(Float64, n, α, β, 0)
+plan_tet2cheb(::Type{Float64}, n::Integer, α) = plan_tet2cheb(Float64, n, α, 0)
+plan_tet2cheb(::Type{Float64}, n::Integer) = plan_tet2cheb(Float64, n, 0)
+
 for (fJ, fadJ, fC, fE, K) in ((:plan_sph_synthesis, :plan_sph_analysis, :ft_plan_sph_synthesis, :ft_execute_sph_synthesis, SPHERESYNTHESIS),
                               (:plan_sph_analysis, :plan_sph_synthesis, :ft_plan_sph_analysis, :ft_execute_sph_analysis, SPHEREANALYSIS),
                               (:plan_sphv_synthesis, :plan_sphv_analysis, :ft_plan_sphv_synthesis, :ft_execute_sphv_synthesis, SPHEREVSYNTHESIS),
@@ -630,10 +641,10 @@ for (fJ, fadJ, fC, fE, K) in ((:plan_sph_synthesis, :plan_sph_analysis, :ft_plan
                               (:plan_tri_synthesis, :plan_tri_analysis, :ft_plan_tri_synthesis, :ft_execute_tri_synthesis, TRIANGLESYNTHESIS),
                               (:plan_tri_analysis, :plan_tri_synthesis, :ft_plan_tri_analysis, :ft_execute_tri_analysis, TRIANGLEANALYSIS))
     @eval begin
-        $fJ(x::Matrix{T}) where T = $fJ(T, size(x, 1), size(x, 2))
-        $fJ(::Type{Complex{T}}, x...) where T <: Real = $fJ(T, x...)
-        function $fJ(::Type{Float64}, n::Integer, m::Integer)
-            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint), n, m)
+        $fJ(x::Matrix{T}; y...) where T = $fJ(T, size(x, 1), size(x, 2); y...)
+        $fJ(::Type{Complex{T}}, x...; y...) where T <: Real = $fJ(T, x...; y...)
+        function $fJ(::Type{Float64}, n::Integer, m::Integer; flags::Integer=FFTW.ESTIMATE)
+            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cuint), n, m, flags)
             return FTPlan{Float64, 2, $K}(plan, n, m)
         end
         adjoint(p::FTPlan{T, 2, $K}) where T = AdjointFTPlan(p, $fadJ(T, p.n, p.m))
@@ -659,10 +670,10 @@ end
 for (fJ, fadJ, fC, fE, K) in ((:plan_tet_synthesis, :plan_tet_analysis, :ft_plan_tet_synthesis, :ft_execute_tet_synthesis, TETRAHEDRONSYNTHESIS),
                               (:plan_tet_analysis, :plan_tet_synthesis, :ft_plan_tet_analysis, :ft_execute_tet_analysis, TETRAHEDRONANALYSIS))
     @eval begin
-        $fJ(x::Array{T, 3}) where T = $fJ(T, size(x, 1), size(x, 2), size(x, 3))
-        $fJ(::Type{Complex{T}}, x...) where T <: Real = $fJ(T, x...)
-        function $fJ(::Type{Float64}, n::Integer, l::Integer, m::Integer)
-            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), n, l, m)
+        $fJ(x::Array{T, 3}; y...) where T = $fJ(T, size(x, 1), size(x, 2), size(x, 3); y...)
+        $fJ(::Type{Complex{T}}, x...; y...) where T <: Real = $fJ(T, x...; y...)
+        function $fJ(::Type{Float64}, n::Integer, l::Integer, m::Integer; flags::Integer=FFTW.ESTIMATE)
+            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint, Cuint), n, l, m, flags)
             return FTPlan{Float64, 3, $K}(plan, n, l, m)
         end
         adjoint(p::FTPlan{T, 3, $K}) where T = AdjointFTPlan(p, $fadJ(T, p.n, p.l, p.m))
@@ -688,9 +699,9 @@ end
 for (fJ, fadJ, fC, fE, K) in ((:plan_spinsph_synthesis, :plan_spinsph_analysis, :ft_plan_spinsph_synthesis, :ft_execute_spinsph_synthesis, SPINSPHERESYNTHESIS),
                               (:plan_spinsph_analysis, :plan_spinsph_synthesis, :ft_plan_spinsph_analysis, :ft_execute_spinsph_analysis, SPINSPHEREANALYSIS))
     @eval begin
-        $fJ(x::Matrix{T}, s::Integer) where T = $fJ(T, size(x, 1), size(x, 2), s)
-        function $fJ(::Type{Complex{Float64}}, n::Integer, m::Integer, s::Integer)
-            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), n, m, s)
+        $fJ(x::Matrix{T}, s::Integer; y...) where T = $fJ(T, size(x, 1), size(x, 2), s; y...)
+        function $fJ(::Type{Complex{Float64}}, n::Integer, m::Integer, s::Integer; flags::Integer=FFTW.ESTIMATE)
+            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint, Cuint), n, m, s, flags)
             return FTPlan{Complex{Float64}, 2, $K}(plan, n, m)
         end
         get_spin(p::FTPlan{T, 2, $K}) where T = ccall((:ft_get_spin_spinsphere_fftw_plan, libfasttransforms), Cint, (Ptr{ft_plan_struct},), p)

From f2c970428de19d993c864dfc57b77718a21a439d Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Thu, 17 Feb 2022 11:57:34 -0600
Subject: [PATCH 093/222] CompatHelper: bump compat for "FillArrays" to "0.13"
 (#164)

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 9f79f5d6..f6e86d85 100644
--- a/Project.toml
+++ b/Project.toml
@@ -26,7 +26,7 @@ DSP = "0.6, 0.7"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.5.4"
-FillArrays = "0.9, 0.10, 0.11, 0.12"
+FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
 ToeplitzMatrices = "0.6, 0.7"

From e55621988ca91d97cac9f98810a73765441da22f Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Thu, 17 Feb 2022 12:09:30 -0600
Subject: [PATCH 094/222] bump version for FillArrays

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index f6e86d85..785af4ed 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.13.5"
+version = "0.13.6"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From 567e3fe44c13ae5d393113b2953d6e8659a4b482 Mon Sep 17 00:00:00 2001
From: Rik Huijzer <t.h.huijzer@rug.nl>
Date: Wed, 23 Feb 2022 07:05:35 +0100
Subject: [PATCH 095/222] Use `pkgdir` (#165)

---
 examples/disk.jl     | 2 +-
 examples/sphere.jl   | 2 +-
 examples/triangle.jl | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/disk.jl b/examples/disk.jl
index cd83a9a5..19db1731 100644
--- a/examples/disk.jl
+++ b/examples/disk.jl
@@ -20,7 +20,7 @@
 # [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
 
 using FastTransforms, LinearAlgebra, Plots
-const GENFIGS = joinpath(dirname(dirname(pathof(FastTransforms))), "docs/src/generated")
+const GENFIGS = joinpath(pkgdir(FastTransforms), "docs/src/generated")
 !isdir(GENFIGS) && mkdir(GENFIGS)
 plotlyjs()
 
diff --git a/examples/sphere.jl b/examples/sphere.jl
index 38329493..916975b5 100644
--- a/examples/sphere.jl
+++ b/examples/sphere.jl
@@ -33,7 +33,7 @@ function threshold!(A::AbstractArray, ϵ)
 end
 
 using FastTransforms, LinearAlgebra, Plots
-const GENFIGS = joinpath(dirname(dirname(pathof(FastTransforms))), "docs/src/generated")
+const GENFIGS = joinpath(pkgdir(FastTransforms), "docs/src/generated")
 !isdir(GENFIGS) && mkdir(GENFIGS)
 plotlyjs()
 
diff --git a/examples/triangle.jl b/examples/triangle.jl
index cb4fbbae..29ebb713 100644
--- a/examples/triangle.jl
+++ b/examples/triangle.jl
@@ -24,7 +24,7 @@
 # [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
 
 using FastTransforms, LinearAlgebra, Plots
-const GENFIGS = joinpath(dirname(dirname(pathof(FastTransforms))), "docs/src/generated")
+const GENFIGS = joinpath(pkgdir(FastTransforms), "docs/src/generated")
 !isdir(GENFIGS) && mkdir(GENFIGS)
 plotlyjs()
 

From 29432fd97e2b5208210e9bb10e40b29f29417062 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Mon, 21 Mar 2022 16:24:33 +0000
Subject: [PATCH 096/222] CompatHelper: bump compat for "ArrayLayouts" to "0.8"
 (#168)

* CompatHelper: bump compat for "ArrayLayouts" to "0.8"

* v0.13.7

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Sheehan Olver <solver@mac.com>
---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 785af4ed..e63acc0e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.13.6"
+version = "0.13.7"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -20,7 +20,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "1.0"
-ArrayLayouts = "0.4, 0.5, 0.6, 0.7"
+ArrayLayouts = "0.4, 0.5, 0.6, 0.7, 0.8"
 BinaryProvider = "0.5"
 DSP = "0.6, 0.7"
 FFTW = "1"

From 4bbefc9ad453d9405c2d482caaa01cf366279d36 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 4 Apr 2022 10:55:56 -0500
Subject: [PATCH 097/222] only use BinaryProvider if building from source

---
 deps/build.jl | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/deps/build.jl b/deps/build.jl
index 944f1e2e..0d36a61b 100644
--- a/deps/build.jl
+++ b/deps/build.jl
@@ -1,21 +1,20 @@
-using BinaryProvider
-import Libdl
+if get(ENV, "FT_BUILD_FROM_SOURCE", "false") == "true"
+    using BinaryProvider
+    import Libdl
 
-const extension = Sys.isapple() ? "dylib" : Sys.islinux() ? "so" : Sys.iswindows() ? "dll" : ""
+    const extension = Sys.isapple() ? "dylib" : Sys.islinux() ? "so" : Sys.iswindows() ? "dll" : ""
 
-print_error() = error(
-    "FastTransforms could not be properly installed.\n Please check that you have all dependencies installed. " *
-    "Sample installation of dependencies:\n" *
-    print_platform_error(platform_key_abi())
-)
+    print_error() = error(
+        "FastTransforms could not be properly installed.\n Please check that you have all dependencies installed. " *
+        "Sample installation of dependencies:\n" *
+        print_platform_error(platform_key_abi())
+    )
 
-print_platform_error(p::Platform) = "On $(BinaryProvider.platform_name(p)), please consider opening a pull request to add support to build from source.\n"
-print_platform_error(p::MacOS) = "On MacOS\n\tbrew install libomp fftw mpfr\n"
-print_platform_error(p::Linux) = "On Linux\n\tsudo apt-get install libomp-dev libblas-dev libopenblas-base libfftw3-dev libmpfr-dev\n"
-print_platform_error(p::Windows) = "On Windows\n\tvcpkg install openblas:x64-windows fftw3[core,threads]:x64-windows mpir:x64-windows mpfr:x64-windows\n"
+    print_platform_error(p::Platform) = "On $(BinaryProvider.platform_name(p)), please consider opening a pull request to add support to build from source.\n"
+    print_platform_error(p::MacOS) = "On MacOS\n\tbrew install libomp fftw mpfr\n"
+    print_platform_error(p::Linux) = "On Linux\n\tsudo apt-get install libomp-dev libblas-dev libopenblas-base libfftw3-dev libmpfr-dev\n"
+    print_platform_error(p::Windows) = "On Windows\n\tvcpkg install openblas:x64-windows fftw3[core,threads]:x64-windows mpir:x64-windows mpfr:x64-windows\n"
 
-ft_build_from_source = get(ENV, "FT_BUILD_FROM_SOURCE", "false")
-if ft_build_from_source == "true"
     make = Sys.iswindows() ? "mingw32-make" : "make"
     flags = Sys.isapple() ? "FT_USE_APPLEBLAS=1" : Sys.iswindows() ? "FT_FFTW_WITH_COMBINED_THREADS=1" : ""
     script = """

From fb75e4ca3d917e9e2d8f86a6a3d356399f12b579 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 4 Apr 2022 10:56:37 -0500
Subject: [PATCH 098/222] new minor

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index e63acc0e..d920dd6c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.13.7"
+version = "0.13.8"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From ee31e98707915c3f4a8aa86c015cd1917cf5a448 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 4 Apr 2022 11:32:47 -0500
Subject: [PATCH 099/222] remove binaryprovider

---
 Project.toml             |  4 +---
 deps/build.jl            | 26 +++++++++-----------------
 src/libfasttransforms.jl |  3 +--
 3 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/Project.toml b/Project.toml
index d920dd6c..3cb2e52e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,11 +1,10 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.13.8"
+version = "0.13.9"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
 ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
-BinaryProvider = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
 DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
@@ -21,7 +20,6 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 [compat]
 AbstractFFTs = "1.0"
 ArrayLayouts = "0.4, 0.5, 0.6, 0.7, 0.8"
-BinaryProvider = "0.5"
 DSP = "0.6, 0.7"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
diff --git a/deps/build.jl b/deps/build.jl
index 0d36a61b..76b27a4e 100644
--- a/deps/build.jl
+++ b/deps/build.jl
@@ -1,20 +1,5 @@
 if get(ENV, "FT_BUILD_FROM_SOURCE", "false") == "true"
-    using BinaryProvider
-    import Libdl
-
-    const extension = Sys.isapple() ? "dylib" : Sys.islinux() ? "so" : Sys.iswindows() ? "dll" : ""
-
-    print_error() = error(
-        "FastTransforms could not be properly installed.\n Please check that you have all dependencies installed. " *
-        "Sample installation of dependencies:\n" *
-        print_platform_error(platform_key_abi())
-    )
-
-    print_platform_error(p::Platform) = "On $(BinaryProvider.platform_name(p)), please consider opening a pull request to add support to build from source.\n"
-    print_platform_error(p::MacOS) = "On MacOS\n\tbrew install libomp fftw mpfr\n"
-    print_platform_error(p::Linux) = "On Linux\n\tsudo apt-get install libomp-dev libblas-dev libopenblas-base libfftw3-dev libmpfr-dev\n"
-    print_platform_error(p::Windows) = "On Windows\n\tvcpkg install openblas:x64-windows fftw3[core,threads]:x64-windows mpir:x64-windows mpfr:x64-windows\n"
-
+    extension = Sys.isapple() ? "dylib" : Sys.islinux() ? "so" : Sys.iswindows() ? "dll" : ""
     make = Sys.iswindows() ? "mingw32-make" : "make"
     flags = Sys.isapple() ? "FT_USE_APPLEBLAS=1" : Sys.iswindows() ? "FT_FFTW_WITH_COMBINED_THREADS=1" : ""
     script = """
@@ -38,7 +23,14 @@ if get(ENV, "FT_BUILD_FROM_SOURCE", "false") == "true"
     try
         run(`bash -c $(script)`)
     catch
-        print_error()
+        error(
+            "FastTransforms could not be properly installed.\n Please check that you have all dependencies installed. " *
+            "Sample installation of dependencies:\n" *
+            (Sys.isapple() ? "On MacOS\n\tbrew install libomp fftw mpfr\n" :
+             Sys.islinux() ? "On Linux\n\tsudo apt-get install libomp-dev libblas-dev libopenblas-base libfftw3-dev libmpfr-dev\n" :
+             Sys.iswindows() ? "On Windows\n\tvcpkg install openblas:x64-windows fftw3[core,threads]:x64-windows mpir:x64-windows mpfr:x64-windows\n" :
+             "On your platform, please consider opening a pull request to add support to build from source.\n")
+        )
     end
     println("FastTransforms built from source.")
 else
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 0391095c..d70ba556 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -1,5 +1,4 @@
-ft_build_from_source = get(ENV, "FT_BUILD_FROM_SOURCE", "false")
-if ft_build_from_source == "true"
+if get(ENV, "FT_BUILD_FROM_SOURCE", "false") == "true"
     using Libdl
     const libfasttransforms = find_library("libfasttransforms", [joinpath(dirname(@__DIR__), "deps")])
     if libfasttransforms ≡ nothing || length(libfasttransforms) == 0

From cc105282a3c112927dfcc816c1fee1a83c99a1ac Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Mon, 9 May 2022 21:06:49 -0500
Subject: [PATCH 100/222] add modified classical op ccalls (#173)

---
 Project.toml                   |   4 +-
 docs/src/dev.md                |  18 ++---
 src/FastTransforms.jl          |   2 +
 src/libfasttransforms.jl       | 128 +++++++++++++++++++++++++++------
 test/libfasttransformstests.jl |  27 +++++++
 5 files changed, 146 insertions(+), 33 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3cb2e52e..6838bfbf 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.13.9"
+version = "0.14.0"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -23,7 +23,7 @@ ArrayLayouts = "0.4, 0.5, 0.6, 0.7, 0.8"
 DSP = "0.6, 0.7"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
-FastTransforms_jll = "0.5.4"
+FastTransforms_jll = "0.6.0"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
diff --git a/docs/src/dev.md b/docs/src/dev.md
index 20e055e9..75c35290 100644
--- a/docs/src/dev.md
+++ b/docs/src/dev.md
@@ -40,34 +40,34 @@ This lets the developer experiment with new features through `ccall`ing into ble
 To get from a C library release to a Julia package release, the developer needs to update Yggdrasil's [build_tarballs.jl](https://github.com/JuliaPackaging/Yggdrasil/blob/master/F/FastTransforms/build_tarballs.jl) script for the new version and its 256-bit SHA. On macOS, the SHA can be found by:
 
 ```julia
-shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.5.0 --output FastTransforms-0.5.0.tar.gz
+shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.6.0 --output FastTransforms.tar.gz
   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                  Dload  Upload   Total   Spent    Left  Speed
-100  156k  100  156k    0     0   349k      0 --:--:-- --:--:-- --:--:--  348k
+100  162k    0  162k    0     0   252k      0 --:--:-- --:--:-- --:--:--  252k
 
-shell> shasum -a 256 FastTransforms-0.5.0.tar.gz
-9556d0037bd5348a33f15ad6100e32053b6e22cab16a97c504f30d6c52fd0efd  FastTransforms-0.5.0.tar.gz
+shell> shasum -a 256 FastTransforms.tar.gz
+ae2db2fa808ca17c5dc5ac25b079eba2dbe598d061b9b4e14c948680870abc3c  FastTransforms.tar.gz
 
-shell> rm -f FastTransforms-0.5.0.tar.gz
+shell> rm -f FastTransforms.tar.gz
 
 ```
 
 Using [SHA.jl](https://github.com/JuliaCrypto/SHA.jl), the SHA can also be found by:
 
 ```julia
-shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.5.0 --output FastTransforms-0.5.0.tar.gz
+shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.6.0 --output FastTransforms.tar.gz
   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                  Dload  Upload   Total   Spent    Left  Speed
 100  156k    0  156k    0     0   443k      0 --:--:-- --:--:-- --:--:--  443k
 
 julia> using SHA
 
-julia> open("FastTransforms-0.5.0.tar.gz") do f
+julia> open("FastTransforms.tar.gz") do f
            bytes2hex(sha256(f))
        end
-"9556d0037bd5348a33f15ad6100e32053b6e22cab16a97c504f30d6c52fd0efd"
+"ae2db2fa808ca17c5dc5ac25b079eba2dbe598d061b9b4e14c948680870abc3c"
 
-shell> rm -f FastTransforms-0.5.0.tar.gz
+shell> rm -f FastTransforms.tar.gz
 
 ```
 
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 40c7bca1..d8f0a191 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -34,12 +34,14 @@ import LinearAlgebra: mul!, lmul!, ldiv!
 export leg2cheb, cheb2leg, ultra2ultra, jac2jac,
        lag2lag, jac2ultra, ultra2jac, jac2cheb,
        cheb2jac, ultra2cheb, cheb2ultra, associatedjac2jac,
+       modifiedjac2jac, modifiedlag2lag, modifiedherm2herm,
        sph2fourier, sphv2fourier, disk2cxf, rectdisk2cheb, tri2cheb, tet2cheb,
        fourier2sph, fourier2sphv, cxf2disk, cheb2rectdisk, cheb2tri, cheb2tet
 
 export plan_leg2cheb, plan_cheb2leg, plan_ultra2ultra, plan_jac2jac,
        plan_lag2lag, plan_jac2ultra, plan_ultra2jac, plan_jac2cheb,
        plan_cheb2jac, plan_ultra2cheb, plan_cheb2ultra, plan_associatedjac2jac,
+       plan_modifiedjac2jac, plan_modifiedlag2lag, plan_modifiedherm2herm,
        plan_sph2fourier, plan_sph_synthesis, plan_sph_analysis,
        plan_sphv2fourier, plan_sphv_synthesis, plan_sphv_analysis,
        plan_disk2cxf, plan_disk_synthesis, plan_disk_analysis,
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index d70ba556..43b53d6e 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -115,28 +115,31 @@ const CHEB2JAC              = 8
 const ULTRA2CHEB            = 9
 const CHEB2ULTRA           = 10
 const ASSOCIATEDJAC2JAC    = 11
-const SPHERE               = 12
-const SPHEREV              = 13
-const DISK                 = 14
-const RECTDISK             = 15
-const TRIANGLE             = 16
-const TETRAHEDRON          = 17
-const SPINSPHERE           = 18
-const SPHERESYNTHESIS      = 19
-const SPHEREANALYSIS       = 20
-const SPHEREVSYNTHESIS     = 21
-const SPHEREVANALYSIS      = 22
-const DISKSYNTHESIS        = 23
-const DISKANALYSIS         = 24
-const RECTDISKSYNTHESIS    = 25
-const RECTDISKANALYSIS     = 26
-const TRIANGLESYNTHESIS    = 27
-const TRIANGLEANALYSIS     = 28
-const TETRAHEDRONSYNTHESIS = 29
-const TETRAHEDRONANALYSIS  = 30
-const SPINSPHERESYNTHESIS  = 31
-const SPINSPHEREANALYSIS   = 32
-const SPHERICALISOMETRY    = 33
+const MODIFIEDJAC2JAC      = 12
+const MODIFIEDLAG2LAG      = 13
+const MODIFIEDHERM2HERM    = 14
+const SPHERE               = 15
+const SPHEREV              = 16
+const DISK                 = 17
+const RECTDISK             = 18
+const TRIANGLE             = 19
+const TETRAHEDRON          = 20
+const SPINSPHERE           = 21
+const SPHERESYNTHESIS      = 22
+const SPHEREANALYSIS       = 23
+const SPHEREVSYNTHESIS     = 24
+const SPHEREVANALYSIS      = 25
+const DISKSYNTHESIS        = 26
+const DISKANALYSIS         = 27
+const RECTDISKSYNTHESIS    = 28
+const RECTDISKANALYSIS     = 29
+const TRIANGLESYNTHESIS    = 30
+const TRIANGLEANALYSIS     = 31
+const TETRAHEDRONSYNTHESIS = 32
+const TETRAHEDRONANALYSIS  = 33
+const SPINSPHERESYNTHESIS  = 34
+const SPINSPHEREANALYSIS   = 35
+const SPHERICALISOMETRY    = 36
 
 
 let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
@@ -151,6 +154,9 @@ let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                ULTRA2CHEB           => "ultraspherical--Chebyshev",
                CHEB2ULTRA           => "Chebyshev--ultraspherical",
                ASSOCIATEDJAC2JAC    => "Associated Jacobi--Jacobi",
+               MODIFIEDJAC2JAC      => "Modified Jacobi--Jacobi",
+               MODIFIEDLAG2LAG      => "Modified Laguerre--Laguerre",
+               MODIFIEDHERM2HERM    => "Modified Hermite--Hermite",
                SPHERE               => "Spherical harmonic--Fourier",
                SPHEREV              => "Spherical vector field--Fourier",
                DISK                 => "Zernike--Chebyshev×Fourier",
@@ -266,6 +272,9 @@ destroy_plan(p::FTPlan{Float64, 1}) = ccall((:ft_destroy_tb_eigen_FMM, libfasttr
 destroy_plan(p::FTPlan{BigFloat, 1}) = ccall((:ft_mpfr_destroy_plan, libfasttransforms), Cvoid, (Ptr{mpfr_t}, Cint), p, p.n)
 destroy_plan(p::FTPlan{Float32, 1, ASSOCIATEDJAC2JAC}) = ccall((:ft_destroy_btb_eigen_FMMf, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 1, ASSOCIATEDJAC2JAC}) = ccall((:ft_destroy_btb_eigen_FMM, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float64, 1, MODIFIEDJAC2JAC}) = ccall((:ft_destroy_modified_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float64, 1, MODIFIEDLAG2LAG}) = ccall((:ft_destroy_modified_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float64, 1, MODIFIEDHERM2HERM}) = ccall((:ft_destroy_modified_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64}) = ccall((:ft_destroy_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHERE}) = ccall((:ft_destroy_spin_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, SPHERESYNTHESIS}) = ccall((:ft_destroy_sphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
@@ -383,6 +392,7 @@ end
 for f in (:leg2cheb, :cheb2leg, :ultra2ultra, :jac2jac,
           :lag2lag, :jac2ultra, :ultra2jac, :jac2cheb,
           :cheb2jac, :ultra2cheb, :cheb2ultra, :associatedjac2jac,
+          :modifiedjac2jac, :modifiedlag2lag, :modifiedherm2herm,
           :sph2fourier, :sphv2fourier, :disk2cxf,
           :rectdisk2cheb, :tri2cheb, :tet2cheb)
     plan_f = Symbol("plan_", f)
@@ -526,6 +536,36 @@ function plan_associatedjac2jac(::Type{Float64}, n::Integer, c::Integer, α, β,
     return FTPlan{Float64, 1, ASSOCIATEDJAC2JAC}(plan, n)
 end
 
+function plan_modifiedjac2jac(::Type{Float64}, n::Integer, α, β, w::Vector{Float64}; verbose::Bool=false)
+    #plan_modifiedjac2jac(Float64, n, α, β, w, Vector{Float64}(undef, 0); verbose=verbose)
+    plan = ccall((:ft_plan_modified_jacobi_to_jacobi, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Float64, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, α, β, length(w), w, 0, C_NULL, verbose)
+    return FTPlan{Float64, 1, MODIFIEDJAC2JAC}(plan, n)
+end
+
+function plan_modifiedjac2jac(::Type{Float64}, n::Integer, α, β, u::Vector{Float64}, v::Vector{Float64}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_jacobi_to_jacobi, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Float64, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, α, β, length(u), u, length(v), v, verbose)
+    return FTPlan{Float64, 1, MODIFIEDJAC2JAC}(plan, n)
+end
+
+function plan_modifiedlag2lag(::Type{Float64}, n::Integer, α, w::Vector{Float64}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_laguerre_to_laguerre, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, α, length(w), w, 0, C_NULL, verbose)
+    return FTPlan{Float64, 1, MODIFIEDLAG2LAG}(plan, n)
+end
+
+function plan_modifiedlag2lag(::Type{Float64}, n::Integer, α, u::Vector{Float64}, v::Vector{Float64}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_laguerre_to_laguerre, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, α, length(u), u, length(v), v, verbose)
+    return FTPlan{Float64, 1, MODIFIEDLAG2LAG}(plan, n)
+end
+
+function plan_modifiedherm2herm(::Type{Float64}, n::Integer, w::Vector{Float64}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_hermite_to_hermite, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, length(w), w, 0, C_NULL, verbose)
+    return FTPlan{Float64, 1, MODIFIEDHERM2HERM}(plan, n)
+end
+
+function plan_modifiedherm2herm(::Type{Float64}, n::Integer, u::Vector{Float64}, v::Vector{Float64}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_hermite_to_hermite, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, length(u), u, length(v), v, verbose)
+    return FTPlan{Float64, 1, MODIFIEDHERM2HERM}(plan, n)
+end
 
 function plan_leg2cheb(::Type{BigFloat}, n::Integer; normleg::Bool=false, normcheb::Bool=false)
     plan = ccall((:ft_mpfr_plan_legendre_to_chebyshev, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint, Clong, Int32), normleg, normcheb, n, precision(BigFloat), Base.MPFR.ROUNDING_MODE[])
@@ -789,6 +829,28 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bbbfmvf, :Float32),
     end
 end
 
+for (fJ, fC, elty) in ((:lmul!, :ft_mpmv, :Float64),
+                       (:ldiv!, :ft_mpsv, :Float64))
+    @eval begin
+        ModifiedFTPlan = Union{FTPlan{$elty, 1, MODIFIEDJAC2JAC}, FTPlan{$elty, 1, MODIFIEDLAG2LAG}, FTPlan{$elty, 1, MODIFIEDHERM2HERM}}
+        function $fJ(p::ModifiedFTPlan, x::Vector{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'N', p, x)
+            return x
+        end
+        function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan}, x::Vector{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
+            return x
+        end
+        function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan}, x::Vector{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
+            return x
+        end
+    end
+end
+
 for (fJ, fC) in ((:lmul!, :ft_mpfr_trmv_ptr),
                  (:ldiv!, :ft_mpfr_trsv_ptr))
     @eval begin
@@ -854,6 +916,28 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bbbfmmf, :Float32),
     end
 end
 
+for (fJ, fC, elty) in ((:lmul!, :ft_mpmm, :Float64),
+                       (:ldiv!, :ft_mpsm, :Float64))
+    @eval begin
+        ModifiedFTPlan = Union{FTPlan{$elty, 1, MODIFIEDJAC2JAC}, FTPlan{$elty, 1, MODIFIEDLAG2LAG}, FTPlan{$elty, 1, MODIFIEDHERM2HERM}}
+        function $fJ(p::ModifiedFTPlan, x::Matrix{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan}, x::Matrix{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan}, x::Matrix{$elty})
+            checksize(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+    end
+end
+
 for (fJ, fC) in ((:lmul!, :ft_mpfr_trmm_ptr),
                  (:ldiv!, :ft_mpfr_trsm_ptr))
     @eval begin
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 818938fa..8d57f013 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -85,6 +85,33 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         @test V\y ≈ x
     end
 
+    @testset "Modified classical orthonormal polynomial transforms" begin
+        (n, α, β) = (16, 0, 0)
+        P1 = plan_modifiedjac2jac(Float64, n, α, β, [0.9428090415820636, -0.32659863237109055, -0.42163702135578396, 0.2138089935299396]) # u1(x) = (1-x)^2*(1+x)
+        P2 = plan_modifiedjac2jac(Float64, n, α, β, [0.9428090415820636, -0.32659863237109055, -0.42163702135578396, 0.2138089935299396], [1.4142135623730951]) # u2(x) = (1-x)^2*(1+x)
+        P3 = plan_modifiedjac2jac(Float64, n, α, β, [-0.9428090415820636, 0.32659863237109055, 0.42163702135578396, -0.2138089935299396], [-5.185449728701348, 0.0, 0.42163702135578374]) # u3(x) = -(1-x)^2*(1+x), v3(x) = -(2-x)*(2+x)
+        P4 = plan_modifiedjac2jac(Float64, n, α+2, β+1, [1.1547005383792517], [4.387862045841156, 0.1319657758147716, -0.20865621238292037]) # v4(x) = (2-x)*(2+x)
+
+        @test P1*I ≈ P2*I
+        @test P1\I ≈ P2\I
+        @test P3*I ≈ P2*(P4*I)
+        @test P3\I ≈ P4\(P2\I)
+
+        P5 = plan_modifiedlag2lag(Float64, n, α, [2.0, -4.0, 2.0]) # u5(x) = x^2
+        P6 = plan_modifiedlag2lag(Float64, n, α, [2.0, -4.0, 2.0], [1.0]) # u6(x) = x^2
+        P7 = plan_modifiedlag2lag(Float64, n, α, [2.0, -4.0, 2.0], [7.0, -7.0, 2.0]) # u7(x) = x^2, v7(x) = (1+x)*(2+x)
+        P8 = plan_modifiedlag2lag(Float64, n, α+2, [sqrt(2.0)], [sqrt(1058.0), -sqrt(726.0), sqrt(48.0)]) # v8(x) = (1+x)*(2+x)
+
+        @test P5*I ≈ P6*I
+        @test P5\I ≈ P6\I
+        @test P7*I ≈ P6*(P8*I)
+        @test P7\I ≈ P8\(P6\I)
+
+        P9 = plan_modifiedherm2herm(Float64, n, [2.995504568550877, 0.0, 3.7655850551068593, 0.0, 1.6305461589167827], [2.995504568550877, 0.0, 3.7655850551068593, 0.0, 1.6305461589167827]) # u9(x) = 1+x^2+x^4, v9(x) = 1+x^2+x^4
+
+        @test P9*I ≈ P9\I
+    end
+
     function test_nd_plans(p, ps, pa, A)
         B = copy(A)
         C = ps*(p*A)

From 7d5a39ebd9cc58e137e2efdd7a707b85596d8b92 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 6 Jul 2022 11:46:30 -0500
Subject: [PATCH 101/222] add triangular typing for * and \ of 1D FTPlans,
 elliptic submodule

---
 Project.toml             |   2 +-
 docs/src/index.md        |   6 ++
 src/FastTransforms.jl    |   1 +
 src/elliptic.jl          | 121 +++++++++++++++++++++++++++++++++++++++
 src/libfasttransforms.jl |  23 ++++++--
 5 files changed, 146 insertions(+), 7 deletions(-)
 create mode 100644 src/elliptic.jl

diff --git a/Project.toml b/Project.toml
index 6838bfbf..fbafe585 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.0"
+version = "0.14.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/docs/src/index.md b/docs/src/index.md
index 2b2cfe51..39fc3a4b 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -135,3 +135,9 @@ FastTransforms.chebyshevjacobimoments2
 ```@docs
 FastTransforms.chebyshevlogmoments2
 ```
+
+### Elliptic
+
+```@docs
+FastTransforms.Elliptic
+```
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index d8f0a191..bdbf670f 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -53,6 +53,7 @@ export plan_leg2cheb, plan_cheb2leg, plan_ultra2ultra, plan_jac2jac,
 include("clenshaw.jl")
 
 include("libfasttransforms.jl")
+include("elliptic.jl")
 
 export nufft, nufft1, nufft2, nufft3, inufft1, inufft2
 
diff --git a/src/elliptic.jl b/src/elliptic.jl
new file mode 100644
index 00000000..ccf52f10
--- /dev/null
+++ b/src/elliptic.jl
@@ -0,0 +1,121 @@
+"""
+`FastTransforms` submodule for the computation of some elliptic integrals and functions.
+
+Complete elliptic integrals of the first and second kinds:
+```math
+K(k) = \\int_0^{\\frac{\\pi}{2}} \\frac{{\\rm d}\\theta}{\\sqrt{1-k^2\\sin^2\\theta}},\\quad{\\rm and},
+```
+```math
+E(k) = \\int_0^{\\frac{\\pi}{2}} \\sqrt{1-k^2\\sin^2\\theta} {\\rm\\,d}\\theta.
+```
+
+Jacobian elliptic functions:
+```math
+x = \\int_0^{\\operatorname{sn}(x,k)} \\frac{{\\rm d}t}{\\sqrt{(1-t^2)(1-k^2t^2)}},
+```
+```math
+x = \\int_{\\operatorname{cn}(x,k)}^1 \\frac{{\\rm d}t}{\\sqrt{(1-t^2)[1-k^2(1-t^2)]}},
+```
+```math
+x = \\int_{\\operatorname{dn}(x,k)}^1 \\frac{{\\rm d}t}{\\sqrt{(1-t^2)(t^2-1+k^2)}},
+```
+and the remaining nine are defined by:
+```math
+\\operatorname{pq}(x,k) = \\frac{\\operatorname{pr}(x,k)}{\\operatorname{qr}(x,k)} = \\frac{1}{\\operatorname{qp}(x,k)}.
+```
+"""
+module Elliptic
+
+import FastTransforms: libfasttransforms
+
+export K, E,
+       sn, cn, dn, ns, nc, nd,
+       sc, cs, sd, ds, cd, dc
+
+for (fC, elty) in ((:ft_complete_elliptic_integralf, :Float32), (:ft_complete_elliptic_integral, :Float64))
+    @eval begin
+        function K(k::$elty)
+            return ccall(($(string(fC)), libfasttransforms), $elty, (Cint, $elty), '1', k)
+        end
+        function E(k::$elty)
+            return ccall(($(string(fC)), libfasttransforms), $elty, (Cint, $elty), '2', k)
+        end
+    end
+end
+
+const SN = UInt(1)
+const CN = UInt(2)
+const DN = UInt(4)
+
+for (fC, elty) in ((:ft_jacobian_elliptic_functionsf, :Float32), (:ft_jacobian_elliptic_functions, :Float64))
+    @eval begin
+        function sn(x::$elty, k::$elty)
+            retsn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, retsn, C_NULL, C_NULL, SN)
+            retsn[]
+        end
+        function cn(x::$elty, k::$elty)
+            retcn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, C_NULL, retcn, C_NULL, CN)
+            retcn[]
+        end
+        function dn(x::$elty, k::$elty)
+            retdn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, C_NULL, C_NULL, retdn, DN)
+            retdn[]
+        end
+        function ns(x::$elty, k::$elty)
+            retsn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, retsn, C_NULL, C_NULL, SN)
+            inv(retsn[])
+        end
+        function nc(x::$elty, k::$elty)
+            retcn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, C_NULL, retcn, C_NULL, CN)
+            inv(retcn[])
+        end
+        function nd(x::$elty, k::$elty)
+            retdn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, C_NULL, C_NULL, retdn, DN)
+            inv(retdn[])
+        end
+        function sc(x::$elty, k::$elty)
+            retsn = Ref{$elty}()
+            retcn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, retsn, retcn, C_NULL, SN & CN)
+            retsn[]/retcn[]
+        end
+        function cs(x::$elty, k::$elty)
+            retsn = Ref{$elty}()
+            retcn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, retsn, retcn, C_NULL, SN & CN)
+            retcn[]/retsn[]
+        end
+        function sd(x::$elty, k::$elty)
+            retsn = Ref{$elty}()
+            retdn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, retsn, C_NULL, retdn, SN & DN)
+            retsn[]/retdn[]
+        end
+        function ds(x::$elty, k::$elty)
+            retsn = Ref{$elty}()
+            retdn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, retsn, C_NULL, retdn, SN & DN)
+            retdn[]/retsn[]
+        end
+        function cd(x::$elty, k::$elty)
+            retcn = Ref{$elty}()
+            retdn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, C_NULL, retcn, retdn, CN & DN)
+            retcn[]/retdn[]
+        end
+        function dc(x::$elty, k::$elty)
+            retcn = Ref{$elty}()
+            retdn = Ref{$elty}()
+            ccall(($(string(fC)), libfasttransforms), Cvoid, ($elty, $elty, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, UInt), x, k, C_NULL, retcn, retdn, CN & DN)
+            retdn[]/retcn[]
+        end
+    end
+end
+
+end # module
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 43b53d6e..8548b2e5 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -567,6 +567,7 @@ function plan_modifiedherm2herm(::Type{Float64}, n::Integer, u::Vector{Float64},
     return FTPlan{Float64, 1, MODIFIEDHERM2HERM}(plan, n)
 end
 
+
 function plan_leg2cheb(::Type{BigFloat}, n::Integer; normleg::Bool=false, normcheb::Bool=false)
     plan = ccall((:ft_mpfr_plan_legendre_to_chebyshev, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint, Clong, Int32), normleg, normcheb, n, precision(BigFloat), Base.MPFR.ROUNDING_MODE[])
     return FTPlan{BigFloat, 1, LEG2CHEB}(plan, n)
@@ -778,12 +779,22 @@ end
 \(p::AdjointFTPlan{T}, x::AbstractArray{T}) where T = ldiv!(p, Array(x))
 \(p::TransposeFTPlan{T}, x::AbstractArray{T}) where T = ldiv!(p, Array(x))
 
-*(p::FTPlan{T, 1}, x::UniformScaling{S}) where {T, S} = lmul!(p, Matrix{promote_type(T, S)}(x, p.n, p.n))
-*(p::AdjointFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = lmul!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n))
-*(p::TransposeFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = lmul!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n))
-\(p::FTPlan{T, 1}, x::UniformScaling{S}) where {T, S} = ldiv!(p, Matrix{promote_type(T, S)}(x, p.n, p.n))
-\(p::AdjointFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = ldiv!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n))
-\(p::TransposeFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = ldiv!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n))
+*(p::FTPlan{T, 1}, x::UniformScaling{S}) where {T, S} = UpperTriangular(lmul!(p, Matrix{promote_type(T, S)}(x, p.n, p.n)))
+*(p::AdjointFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = LowerTriangular(lmul!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n)))
+*(p::TransposeFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = LowerTriangular(lmul!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n)))
+\(p::FTPlan{T, 1}, x::UniformScaling{S}) where {T, S} = UpperTriangular(ldiv!(p, Matrix{promote_type(T, S)}(x, p.n, p.n)))
+\(p::AdjointFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = LowerTriangular(ldiv!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n)))
+\(p::TransposeFTPlan{T, FTPlan{T, 1, K}}, x::UniformScaling{S}) where {T, S, K} = LowerTriangular(ldiv!(p, Matrix{promote_type(T, S)}(x, p.parent.n, p.parent.n)))
+
+const AbstractUpperTriangular{T, S <: AbstractMatrix} = Union{UpperTriangular{T, S}, UnitUpperTriangular{T, S}}
+const AbstractLowerTriangular{T, S <: AbstractMatrix} = Union{LowerTriangular{T, S}, UnitLowerTriangular{T, S}}
+
+*(p::FTPlan{T, 1}, x::AbstractUpperTriangular) where T = UpperTriangular(lmul!(p, Array(x)))
+*(p::AdjointFTPlan{T, 1}, x::AbstractLowerTriangular) where T = LowerTriangular(lmul!(p, Array(x)))
+*(p::TransposeFTPlan{T, 1}, x::AbstractLowerTriangular) where T = LowerTriangular(lmul!(p, Array(x)))
+\(p::FTPlan{T, 1}, x::AbstractUpperTriangular) where T = UpperTriangular(ldiv!(p, Array(x)))
+\(p::AdjointFTPlan{T, 1}, x::AbstractLowerTriangular) where T = LowerTriangular(ldiv!(p, Array(x)))
+\(p::TransposeFTPlan{T, 1}, x::AbstractLowerTriangular) where T = LowerTriangular(ldiv!(p, Array(x)))
 
 for (fJ, fC, elty) in ((:lmul!, :ft_bfmvf, :Float32),
                        (:ldiv!, :ft_bfsvf, :Float32),

From 3bd5a9a2cf744fc26418fe999bbb151b5ccc6634 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 6 Jul 2022 12:08:26 -0500
Subject: [PATCH 102/222] fix docs example

---
 examples/subspaceangles.jl |  2 +-
 src/libfasttransforms.jl   | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/examples/subspaceangles.jl b/examples/subspaceangles.jl
index d3c19a94..b6bf5ef0 100644
--- a/examples/subspaceangles.jl
+++ b/examples/subspaceangles.jl
@@ -22,7 +22,7 @@ k, N = 1, 11
 P = plan_lag2lag(Float64, N, α, β; norm2=true)
 
 # We apply the plan to the identity, followed by the adjoint plan:
-VtV = P*I
+VtV = parent(P*I)
 lmul!(P', VtV)
 
 # From this matrix, the angles are recovered from:
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 8548b2e5..caa514a2 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -389,6 +389,8 @@ function unsafe_convert(::Type{Ptr{mpfr_t}}, p::TransposeFTPlan)
     end
 end
 
+const ModifiedFTPlan{T} = Union{FTPlan{T, 1, MODIFIEDJAC2JAC}, FTPlan{T, 1, MODIFIEDLAG2LAG}, FTPlan{T, 1, MODIFIEDHERM2HERM}}
+
 for f in (:leg2cheb, :cheb2leg, :ultra2ultra, :jac2jac,
           :lag2lag, :jac2ultra, :ultra2jac, :jac2cheb,
           :cheb2jac, :ultra2cheb, :cheb2ultra, :associatedjac2jac,
@@ -843,18 +845,17 @@ end
 for (fJ, fC, elty) in ((:lmul!, :ft_mpmv, :Float64),
                        (:ldiv!, :ft_mpsv, :Float64))
     @eval begin
-        ModifiedFTPlan = Union{FTPlan{$elty, 1, MODIFIEDJAC2JAC}, FTPlan{$elty, 1, MODIFIEDLAG2LAG}, FTPlan{$elty, 1, MODIFIEDHERM2HERM}}
-        function $fJ(p::ModifiedFTPlan, x::Vector{$elty})
+        function $fJ(p::ModifiedFTPlan{$elty}, x::Vector{$elty})
             checksize(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'N', p, x)
             return x
         end
-        function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan}, x::Vector{$elty})
+        function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan{$elty}}, x::Vector{$elty})
             checksize(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
             return x
         end
-        function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan}, x::Vector{$elty})
+        function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan{$elty}}, x::Vector{$elty})
             checksize(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
             return x
@@ -930,18 +931,17 @@ end
 for (fJ, fC, elty) in ((:lmul!, :ft_mpmm, :Float64),
                        (:ldiv!, :ft_mpsm, :Float64))
     @eval begin
-        ModifiedFTPlan = Union{FTPlan{$elty, 1, MODIFIEDJAC2JAC}, FTPlan{$elty, 1, MODIFIEDLAG2LAG}, FTPlan{$elty, 1, MODIFIEDHERM2HERM}}
-        function $fJ(p::ModifiedFTPlan, x::Matrix{$elty})
+        function $fJ(p::ModifiedFTPlan{$elty}, x::Matrix{$elty})
             checksize(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
             return x
         end
-        function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan}, x::Matrix{$elty})
+        function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan{$elty}}, x::Matrix{$elty})
             checksize(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
             return x
         end
-        function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan}, x::Matrix{$elty})
+        function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan{$elty}}, x::Matrix{$elty})
             checksize(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
             return x

From ae31986b994dbd7057a899e41e815e95b090430c Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 20 Jul 2022 13:36:41 -0600
Subject: [PATCH 103/222] remove ArrayLayouts (#177)

cuts down the load time by about 1/3 -- 1/2.

The calling sequences were probably dangerous too since an AbstractVector with AbstractColumnMajor layout could be conceived for which we don't point to the data.
---
 Project.toml             |  2 --
 src/FastTransforms.jl    |  2 +-
 src/clenshaw.jl          |  6 ++----
 src/libfasttransforms.jl | 24 ++++++++++++------------
 4 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/Project.toml b/Project.toml
index fbafe585..8ed12314 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,7 +4,6 @@ version = "0.14.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
-ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
 DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
@@ -19,7 +18,6 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "1.0"
-ArrayLayouts = "0.4, 0.5, 0.6, 0.7, 0.8"
 DSP = "0.6, 0.7"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index bdbf670f..40d7dc16 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,6 +1,6 @@
 module FastTransforms
 
-using ArrayLayouts, FastGaussQuadrature, FillArrays, LinearAlgebra,
+using FastGaussQuadrature, FillArrays, LinearAlgebra,
       Reexport, SpecialFunctions, ToeplitzMatrices
 
 import DSP
diff --git a/src/clenshaw.jl b/src/clenshaw.jl
index e57d8952..a3e32452 100644
--- a/src/clenshaw.jl
+++ b/src/clenshaw.jl
@@ -164,9 +164,7 @@ clenshaw!(c::AbstractVector, x::AbstractVector) = clenshaw!(c, x, x)
 evaluates the first-kind Chebyshev (T) expansion with coefficients `c` at points `x`,
 overwriting `f` with the results.
 """
-clenshaw!(c::AbstractVector, x::AbstractVector, f::AbstractVector) = _clenshaw!(MemoryLayout(c), MemoryLayout(x), MemoryLayout(f), c, x, f)
-
-function _clenshaw!(_, _, _, c::AbstractVector, x::AbstractVector, f::AbstractVector)
+function clenshaw!(c::AbstractVector, x::AbstractVector, f::AbstractVector)
     f .= clenshaw.(Ref(c), x)
 end
 
@@ -212,4 +210,4 @@ end
 
 clenshaw(c::AbstractVector, x::AbstractVector) = clenshaw!(c, copy(x))
 clenshaw(c::AbstractMatrix, x::Number) = clenshaw!(c, x, Vector{promote_type(eltype(c),typeof(x))}(undef, size(c,2)))
-clenshaw(c::AbstractMatrix, x::AbstractVector) = clenshaw!(c, x, Matrix{promote_type(eltype(c),eltype(x))}(undef, size(x,1), size(c,2)))
\ No newline at end of file
+clenshaw(c::AbstractMatrix, x::AbstractVector) = clenshaw!(c, x, Matrix{promote_type(eltype(c),eltype(x))}(undef, size(x,1), size(c,2)))
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index caa514a2..408c1314 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -49,15 +49,15 @@ function renew!(x::Array{BigFloat})
     return x
 end
 
-function horner!(c::Vector{Float64}, x::Vector{Float64}, f::Vector{Float64})
+function horner!(c::StridedVector{Float64}, x::Vector{Float64}, f::Vector{Float64})
     @assert length(x) == length(f)
-    ccall((:ft_horner, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, 1, length(x), x, f)
+    ccall((:ft_horner, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, stride(c, 1), length(x), x, f)
     f
 end
 
-function horner!(c::Vector{Float32}, x::Vector{Float32}, f::Vector{Float32})
+function horner!(c::StridedVector{Float32}, x::Vector{Float32}, f::Vector{Float32})
     @assert length(x) == length(f)
-    ccall((:ft_hornerf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, 1, length(x), x, f)
+    ccall((:ft_hornerf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, stride(c, 1), length(x), x, f)
     f
 end
 
@@ -75,31 +75,31 @@ function check_clenshaw_points(x, f)
     length(x) == length(f) || throw(ArgumentError("Dimensions must match"))
 end
 
-function _clenshaw!(::AbstractStridedLayout, ::AbstractColumnMajor, ::AbstractColumnMajor, c::AbstractVector{Float64}, x::AbstractVector{Float64}, f::AbstractVector{Float64})
+function clenshaw!(c::StridedVector{Float64}, x::Vector{Float64}, f::Vector{Float64})
     @boundscheck check_clenshaw_points(x, f)
-    ccall((:ft_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, stride(c,1), length(x), x, f)
+    ccall((:ft_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, stride(c, 1), length(x), x, f)
     f
 end
 
-function _clenshaw!(::AbstractStridedLayout, ::AbstractColumnMajor, ::AbstractColumnMajor, c::AbstractVector{Float32}, x::AbstractVector{Float32}, f::AbstractVector{Float32})
+function clenshaw!(c::StridedVector{Float32}, x::Vector{Float32}, f::Vector{Float32})
     @boundscheck check_clenshaw_points(x, f)
-    ccall((:ft_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, stride(c,1), length(x), x, f)
+    ccall((:ft_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, stride(c, 1), length(x), x, f)
     f
 end
 
-function clenshaw!(c::Vector{Float64}, A::Vector{Float64}, B::Vector{Float64}, C::Vector{Float64}, x::Vector{Float64}, ϕ₀::Vector{Float64}, f::Vector{Float64})
+function clenshaw!(c::StridedVector{Float64}, A::Vector{Float64}, B::Vector{Float64}, C::Vector{Float64}, x::Vector{Float64}, ϕ₀::Vector{Float64}, f::Vector{Float64})
     N = length(c)
     @boundscheck check_clenshaw_recurrences(N, A, B, C)
     @boundscheck check_clenshaw_points(x, ϕ₀, f)
-    ccall((:ft_orthogonal_polynomial_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Cint, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}), N, c, 1, A, B, C, length(x), x, ϕ₀, f)
+    ccall((:ft_orthogonal_polynomial_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Cint, Ptr{Float64}, Ptr{Float64}, Ptr{Float64}), N, c, stride(c, 1), A, B, C, length(x), x, ϕ₀, f)
     f
 end
 
-function clenshaw!(c::Vector{Float32}, A::Vector{Float32}, B::Vector{Float32}, C::Vector{Float32}, x::Vector{Float32}, ϕ₀::Vector{Float32}, f::Vector{Float32})
+function clenshaw!(c::StridedVector{Float32}, A::Vector{Float32}, B::Vector{Float32}, C::Vector{Float32}, x::Vector{Float32}, ϕ₀::Vector{Float32}, f::Vector{Float32})
     N = length(c)
     @boundscheck check_clenshaw_recurrences(N, A, B, C)
     @boundscheck check_clenshaw_points(x, ϕ₀, f)
-    ccall((:ft_orthogonal_polynomial_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cint, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}), N, c, 1, A, B, C, length(x), x, ϕ₀, f)
+    ccall((:ft_orthogonal_polynomial_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}, Cint, Ptr{Float32}, Ptr{Float32}, Ptr{Float32}), N, c, stride(c, 1), A, B, C, length(x), x, ϕ₀, f)
     f
 end
 

From 8a82d79f7065fd1ae7363384248e6570216a93b2 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Thu, 21 Jul 2022 15:31:02 -0600
Subject: [PATCH 104/222] improve inuffts by factorizing Toeplitz before
 entering into cg

---
 src/inufft.jl | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/inufft.jl b/src/inufft.jl
index 6701412c..a843c59a 100644
--- a/src/inufft.jl
+++ b/src/inufft.jl
@@ -3,9 +3,9 @@ Pre-computes an inverse nonuniform fast Fourier transform of type `N`.
 
 For best performance, choose the right number of threads by `FFTW.set_num_threads(4)`, for example.
 """
-struct iNUFFTPlan{N,T,S,PT} <: Plan{T}
+struct iNUFFTPlan{N,T,S,PT,TF} <: Plan{T}
     pt::PT
-    TP::Toeplitz{T}
+    TP::TF
     r::Vector{T}
     p::Vector{T}
     Ap::Vector{T}
@@ -24,12 +24,12 @@ function plan_inufft1(ω::AbstractVector{T}, ϵ::T) where T<:AbstractFloat
     avg = (r[1]+c[1])/2
     r[1] = avg
     c[1] = avg
-    TP = Toeplitz(c, r)
+    TP = factorize(Toeplitz(c, r))
     r = zero(c)
     p = zero(c)
     Ap = zero(c)
 
-    iNUFFTPlan{1, eltype(TP), typeof(ϵ), typeof(pt)}(pt, TP, r, p, Ap, ϵ)
+    iNUFFTPlan{1, eltype(TP), typeof(ϵ), typeof(pt), typeof(TP)}(pt, TP, r, p, Ap, ϵ)
 end
 
 """
@@ -43,12 +43,12 @@ function plan_inufft2(x::AbstractVector{T}, ϵ::T) where T<:AbstractFloat
     avg = (r[1]+c[1])/2
     r[1] = avg
     c[1] = avg
-    TP = Toeplitz(c, r)
+    TP = factorize(Toeplitz(c, r))
     r = zero(c)
     p = zero(c)
     Ap = zero(c)
 
-    iNUFFTPlan{2, eltype(TP), typeof(ϵ), typeof(pt)}(pt, TP, r, p, Ap, ϵ)
+    iNUFFTPlan{2, eltype(TP), typeof(ϵ), typeof(pt), typeof(TP)}(pt, TP, r, p, Ap, ϵ)
 end
 
 
@@ -80,10 +80,8 @@ Computes an inverse nonuniform fast Fourier transform of type II.
 """
 inufft2(c::AbstractVector, x::AbstractVector{T}, ϵ::T) where {T<:AbstractFloat} = plan_inufft2(x, ϵ)*c
 
-function cg_for_inufft(A::ToeplitzMatrices.AbstractToeplitz{T}, x::AbstractVector{T}, b::AbstractVector{T}, r::AbstractVector{T}, p::AbstractVector{T}, Ap::AbstractVector{T}, max_it::Integer, tol::Real) where T
+function cg_for_inufft(A::ToeplitzMatrices.ToeplitzFactorization{T}, x::AbstractVector{T}, b::AbstractVector{T}, r::AbstractVector{T}, p::AbstractVector{T}, Ap::AbstractVector{T}, max_it::Integer, tol::Real) where T
 	n = length(b)
-	n1, n2 = size(A)
-	n == n1 == n2 || throw(DimensionMismatch(""))
     nrmb = norm(b)
     if nrmb == 0 nrmb = one(typeof(nrmb)) end
 	copyto!(x, b)

From cb622baabe75f6d47f81fa7f9ad68000256b90d2 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 3 Aug 2022 12:59:53 -0600
Subject: [PATCH 105/222] allow views in *, \ and contiguous lmul! and ldiv!
 (#182)

finish https://github.com/JuliaApproximation/FastTransforms.jl/commit/3ba4ee3b717598a7ed89fbe675e010c01c415bb9
---
 examples/nonlocaldiffusion.jl |   2 +-
 src/libfasttransforms.jl      | 116 ++++++++++++++++++++++++----------
 2 files changed, 83 insertions(+), 35 deletions(-)

diff --git a/examples/nonlocaldiffusion.jl b/examples/nonlocaldiffusion.jl
index 9598e5c2..0399a110 100644
--- a/examples/nonlocaldiffusion.jl
+++ b/examples/nonlocaldiffusion.jl
@@ -70,7 +70,7 @@ function evaluate_lambda(n::Integer, alpha::T, delta::T) where T
 
     p = plan_jac2jac(T, n-1, zero(T), zero(T), alpha, zero(T))
 
-    lambda[2:end] .= p'lambda[2:end]
+    lmul!(p', view(lambda, 2:n))
 
     for i = 2:n-1
         lambda[i+1] = ((2i-1)*lambda[i+1] + (i-1)*lambda[i])/i
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 408c1314..f7a44189 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -33,8 +33,8 @@ struct mpfr_t <: AbstractFloat
 end
 
 """
-`BigFloat` is a mutable struct and there is no guarantee that each entry in
-an `Array{BigFloat}` has unique pointers. For example, looking at the `Limb`s,
+`BigFloat` is a mutable struct and there is no guarantee that each entry in an
+`AbstractArray{BigFloat}` is unique. For example, looking at the `Limb`s,
 
     Id = Matrix{BigFloat}(I, 3, 3)
     map(x->x.d, Id)
@@ -42,7 +42,7 @@ an `Array{BigFloat}` has unique pointers. For example, looking at the `Limb`s,
 shows that the ones and the zeros all share the same pointers. If a C function
 assumes unicity of each datum, then the array must be renewed with a `deepcopy`.
 """
-function renew!(x::Array{BigFloat})
+function renew!(x::AbstractArray{BigFloat})
     for i in eachindex(x)
         @inbounds x[i] = deepcopy(x[i])
     end
@@ -221,12 +221,20 @@ show(io::IO, p::FTPlan{T, 2, K}) where {T, K} = print(io, "FastTransforms plan f
 show(io::IO, p::FTPlan{T, 3, K}) where {T, K} = print(io, "FastTransforms plan for ", kind2string(K), " for $(p.n)×$(p.l)×$(p.m)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, SPHERICALISOMETRY}) where T = print(io, "FastTransforms ", kind2string(SPHERICALISOMETRY), " plan for $(p.n)×$(2p.n-1)-element array of ", T)
 
-function checksize(p::FTPlan{T, 1}, x::Array{T}) where T
+function checksize(p::FTPlan{T, 1}, x::StridedArray{T}) where T
     if p.n != size(x, 1)
         throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.n), x has leading dimension $(size(x, 1))"))
     end
 end
 
+function checkstrides(p::FTPlan{T, 1}, x::StridedArray{T}) where T
+    sz = size(x)
+    st = strides(x)
+    if (1, cumprod(sz)...) != (st..., length(x))
+        error("FTPlan requires unit strides, x has strides $(strides(x))")
+    end
+end
+
 for (N, K) in ((2, RECTDISK), (2, TRIANGLE), (3, TETRAHEDRON))
     @eval function checksize(p::FTPlan{T, $N, $K}, x::Array{T, $N}) where T
         if p.n != size(x, 1)
@@ -325,6 +333,14 @@ function checksize(p::AdjointFTPlan, x)
     end
 end
 
+function checkstrides(p::AdjointFTPlan, x)
+    try
+        checkstrides(p.adjoint, x)
+    catch
+        checkstrides(p.parent, x)
+    end
+end
+
 function unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::AdjointFTPlan)
     try
         unsafe_convert(Ptr{ft_plan_struct}, p.adjoint)
@@ -373,6 +389,14 @@ function checksize(p::TransposeFTPlan, x)
     end
 end
 
+function checkstrides(p::TransposeFTPlan, x)
+    try
+        checkstrides(p.transpose, x)
+    catch
+        checkstrides(p.parent, x)
+    end
+end
+
 function unsafe_convert(::Type{Ptr{ft_plan_struct}}, p::TransposeFTPlan)
     try
         unsafe_convert(Ptr{ft_plan_struct}, p.transpose)
@@ -803,18 +827,21 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bfmvf, :Float32),
                        (:lmul!, :ft_bfmv , :Float64),
                        (:ldiv!, :ft_bfsv , :Float64))
     @eval begin
-        function $fJ(p::FTPlan{$elty, 1}, x::Vector{$elty})
+        function $fJ(p::FTPlan{$elty, 1}, x::StridedVector{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'N', p, x)
             return x
         end
-        function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, K}}, x::Vector{$elty}) where K
+        function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, K}}, x::StridedVector{$elty}) where K
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
             return x
         end
-        function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, K}}, x::Vector{$elty}) where K
+        function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, K}}, x::StridedVector{$elty}) where K
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
             return x
         end
@@ -824,18 +851,21 @@ end
 for (fJ, fC, elty) in ((:lmul!, :ft_bbbfmvf, :Float32),
                        (:lmul!, :ft_bbbfmv , :Float64))
     @eval begin
-        function $fJ(p::FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}, x::Vector{$elty})
+        function $fJ(p::FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}, x::StridedVector{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'N', '2', '1', p, x)
             return x
         end
-        function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::Vector{$elty})
+        function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::StridedVector{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', '1', '2', p, x)
             return x
         end
-        function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::Vector{$elty})
+        function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::StridedVector{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', '1', '2', p, x)
             return x
         end
@@ -845,18 +875,21 @@ end
 for (fJ, fC, elty) in ((:lmul!, :ft_mpmv, :Float64),
                        (:ldiv!, :ft_mpsv, :Float64))
     @eval begin
-        function $fJ(p::ModifiedFTPlan{$elty}, x::Vector{$elty})
+        function $fJ(p::ModifiedFTPlan{$elty}, x::StridedVector{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'N', p, x)
             return x
         end
-        function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan{$elty}}, x::Vector{$elty})
+        function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan{$elty}}, x::StridedVector{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
             return x
         end
-        function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan{$elty}}, x::Vector{$elty})
+        function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan{$elty}}, x::StridedVector{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
             return x
         end
@@ -866,18 +899,21 @@ end
 for (fJ, fC) in ((:lmul!, :ft_mpfr_trmv_ptr),
                  (:ldiv!, :ft_mpfr_trsv_ptr))
     @eval begin
-        function $fJ(p::FTPlan{BigFloat, 1}, x::Vector{BigFloat})
+        function $fJ(p::FTPlan{BigFloat, 1}, x::StridedVector{BigFloat})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Int32), 'N', p.n, p, p.n, renew!(x), Base.MPFR.ROUNDING_MODE[])
             return x
         end
-        function $fJ(p::AdjointFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::Vector{BigFloat}) where K
+        function $fJ(p::AdjointFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::StridedVector{BigFloat}) where K
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), Base.MPFR.ROUNDING_MODE[])
             return x
         end
-        function $fJ(p::TransposeFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::Vector{BigFloat}) where K
+        function $fJ(p::TransposeFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::StridedVector{BigFloat}) where K
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), Base.MPFR.ROUNDING_MODE[])
             return x
         end
@@ -889,18 +925,21 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bfmmf, :Float32),
                        (:lmul!, :ft_bfmm , :Float64),
                        (:ldiv!, :ft_bfsm , :Float64))
     @eval begin
-        function $fJ(p::FTPlan{$elty, 1}, x::Matrix{$elty})
+        function $fJ(p::FTPlan{$elty, 1}, x::StridedMatrix{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
             return x
         end
-        function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, K}}, x::Matrix{$elty}) where K
+        function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, K}}, x::StridedMatrix{$elty}) where K
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
             return x
         end
-        function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, K}}, x::Matrix{$elty}) where K
+        function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, K}}, x::StridedMatrix{$elty}) where K
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
             return x
         end
@@ -910,18 +949,21 @@ end
 for (fJ, fC, elty) in ((:lmul!, :ft_bbbfmmf, :Float32),
                        (:lmul!, :ft_bbbfmm , :Float64))
     @eval begin
-        function $fJ(p::FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}, x::Matrix{$elty})
+        function $fJ(p::FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}, x::StridedMatrix{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', '2', '1', p, x, size(x, 1), size(x, 2))
             return x
         end
-        function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::Matrix{$elty})
+        function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::StridedMatrix{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', '1', '2', p, x, size(x, 1), size(x, 2))
             return x
         end
-        function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::Matrix{$elty})
+        function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::StridedMatrix{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', '1', '2', p, x, size(x, 1), size(x, 2))
             return x
         end
@@ -931,18 +973,21 @@ end
 for (fJ, fC, elty) in ((:lmul!, :ft_mpmm, :Float64),
                        (:ldiv!, :ft_mpsm, :Float64))
     @eval begin
-        function $fJ(p::ModifiedFTPlan{$elty}, x::Matrix{$elty})
+        function $fJ(p::ModifiedFTPlan{$elty}, x::StridedMatrix{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
             return x
         end
-        function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan{$elty}}, x::Matrix{$elty})
+        function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan{$elty}}, x::StridedMatrix{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
             return x
         end
-        function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan{$elty}}, x::Matrix{$elty})
+        function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan{$elty}}, x::StridedMatrix{$elty})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
             return x
         end
@@ -952,18 +997,21 @@ end
 for (fJ, fC) in ((:lmul!, :ft_mpfr_trmm_ptr),
                  (:ldiv!, :ft_mpfr_trsm_ptr))
     @eval begin
-        function $fJ(p::FTPlan{BigFloat, 1}, x::Matrix{BigFloat})
+        function $fJ(p::FTPlan{BigFloat, 1}, x::StridedMatrix{BigFloat})
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'N', p.n, p, p.n, renew!(x), size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
             return x
         end
-        function $fJ(p::AdjointFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::Matrix{BigFloat}) where K
+        function $fJ(p::AdjointFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::StridedMatrix{BigFloat}) where K
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
             return x
         end
-        function $fJ(p::TransposeFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::Matrix{BigFloat}) where K
+        function $fJ(p::TransposeFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::StridedMatrix{BigFloat}) where K
             checksize(p, x)
+            checkstrides(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
             return x
         end
@@ -1077,12 +1125,12 @@ function execute_sph_reflection!(p::FTPlan{Float64, 2, SPHERICALISOMETRY}, w, x:
 end
 execute_sph_reflection!(p::FTPlan{Float64, 2, SPHERICALISOMETRY}, w1, w2, w3, x::Matrix{Float64}) = execute_sph_reflection!(p, ft_reflection(w1, w2, w3), x)
 
-*(p::FTPlan{T}, x::Array{Complex{T}}) where T = lmul!(p, deepcopy(x))
-*(p::AdjointFTPlan{T}, x::Array{Complex{T}}) where T = lmul!(p, deepcopy(x))
-*(p::TransposeFTPlan{T}, x::Array{Complex{T}}) where T = lmul!(p, deepcopy(x))
-\(p::FTPlan{T}, x::Array{Complex{T}}) where T = ldiv!(p, deepcopy(x))
-\(p::AdjointFTPlan{T}, x::Array{Complex{T}}) where T = ldiv!(p, deepcopy(x))
-\(p::TransposeFTPlan{T}, x::Array{Complex{T}}) where T = ldiv!(p, deepcopy(x))
+*(p::FTPlan{T}, x::AbstractArray{Complex{T}}) where T = lmul!(p, Array(x))
+*(p::AdjointFTPlan{T}, x::AbstractArray{Complex{T}}) where T = lmul!(p, Array(x))
+*(p::TransposeFTPlan{T}, x::AbstractArray{Complex{T}}) where T = lmul!(p, Array(x))
+\(p::FTPlan{T}, x::AbstractArray{Complex{T}}) where T = ldiv!(p, Array(x))
+\(p::AdjointFTPlan{T}, x::AbstractArray{Complex{T}}) where T = ldiv!(p, Array(x))
+\(p::TransposeFTPlan{T}, x::AbstractArray{Complex{T}}) where T = ldiv!(p, Array(x))
 
 for fJ in (:lmul!, :ldiv!)
     @eval begin

From c82a41c0b2efd91cec50ac873083cc5d686a1631 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub@users.noreply.github.com>
Date: Thu, 4 Aug 2022 00:33:24 +0530
Subject: [PATCH 106/222] clean up whitespaces (#180)

---
 src/PaduaTransform.jl     |  2 +-
 src/chebyshevtransform.jl | 30 +++++++++++++++---------------
 test/chebyshevtests.jl    |  4 ++--
 test/clenshawtests.jl     | 20 ++++++++++----------
 4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/src/PaduaTransform.jl b/src/PaduaTransform.jl
index 86a88eca..6188e74d 100644
--- a/src/PaduaTransform.jl
+++ b/src/PaduaTransform.jl
@@ -123,7 +123,7 @@ Pre-plan a Padua Transform.
 """
 function plan_paduatransform!(::Type{T},N::Integer,lex) where T
     n=Int(cld(-3+sqrt(1+8N),2))
-    if N ≠ ((n+1)*(n+2))÷2
+    if N ≠ ((n+1)*(n+2))÷2
         error("Padua transforms can only be applied to vectors of length (n+1)*(n+2)/2.")
     end
     PaduaTransformPlan(Array{T}(undef,n+2,n+1),FFTW.plan_r2r!(Array{T}(undef,n+2,n+1),FFTW.REDFT00),lex)
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 0563f26b..9889f728 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -364,7 +364,7 @@ plan_chebyshevutransform(x::AbstractArray, dims...; kws...) = plan_chebyshevutra
 
 @inline function _chebu1_prescale!(_, x::AbstractVector{T}) where T
     n = length(x)
-    for k=1:n # sqrt(1-x_j^2) weight
+    for k=1:n # sqrt(1-x_j^2) weight
         x[k] *= sinpi(one(T)/(2n) + (k-one(T))/n)/n
     end
     x
@@ -372,21 +372,21 @@ end
 
 @inline function _chebu1_postscale!(_, x::AbstractVector{T}) where T
     n = length(x)
-    for k=1:n # sqrt(1-x_j^2) weight
+    for k=1:n # sqrt(1-x_j^2) weight
         x[k] /= sinpi(one(T)/(2n) + (k-one(T))/n)/n
     end
     x
 end
 
 function *(P::ChebyshevUTransformPlan{T,1,K,true}, x::AbstractVector{T}) where {T,K}
-    length(x) ≤ 1 && return x
+    length(x) ≤ 1 && return x
     _chebu1_prescale!(P.plan.region, x)
     P.plan * x
 end
 
 function mul!(y::AbstractVector{T}, P::ChebyshevUTransformPlan{T,1,K,false}, x::AbstractVector{T}) where {T,K}
     n = length(x)
-    length(x) ≤ 1 && return copyto!(y, x)
+    length(x) ≤ 1 && return copyto!(y, x)
     _chebu1_prescale!(P.plan.region, x)
     _plan_mul!(y, P.plan, x)
     _chebu1_postscale!(P.plan.region, x)
@@ -396,7 +396,7 @@ end
 @inline function _chebu2_prescale!(_, x::AbstractVector{T}) where T
     n = length(x)
     c = one(T)/ (n+1)
-    for k=1:n # sqrt(1-x_j^2) weight
+    for k=1:n # sqrt(1-x_j^2) weight
         x[k] *= sinpi(k*c)
     end
     x
@@ -405,7 +405,7 @@ end
 @inline function _chebu2_postscale!(_, x::AbstractVector{T}) where T
     n = length(x)
     c = one(T)/ (n+1)
-    @inbounds for k=1:n # sqrt(1-x_j^2) weight
+    @inbounds for k=1:n # sqrt(1-x_j^2) weight
         x[k] /= sinpi(k*c)
     end
     x
@@ -413,14 +413,14 @@ end
 
 function *(P::ChebyshevUTransformPlan{T,2,K,true}, x::AbstractVector{T}) where {T,K}
     n = length(x)
-    n ≤ 1 && return x
+    n ≤ 1 && return x
     _chebu2_prescale!(P.plan.region, x)
     lmul!(one(T)/ (n+1), P.plan * x)
 end
 
 function mul!(y::AbstractVector{T}, P::ChebyshevUTransformPlan{T,2,K,false}, x::AbstractVector{T}) where {T,K}
     n = length(x)
-    n ≤ 1 && return copyto!(y, x)
+    n ≤ 1 && return copyto!(y, x)
     _chebu2_prescale!(P.plan.region, x)
     _plan_mul!(y, P.plan, x)
     _chebu2_postscale!(P.plan.region, x)
@@ -485,14 +485,14 @@ plan_ichebyshevutransform(x::AbstractArray, dims...; kws...) = plan_ichebyshevut
 
 function _ichebyu1_postscale!(_, x::AbstractVector{T}) where T
     n = length(x)
-    @inbounds for k=1:n # sqrt(1-x_j^2) weight
+    @inbounds for k=1:n # sqrt(1-x_j^2) weight
         x[k] /= 2sinpi(one(T)/(2n) + (k-one(T))/n)
     end
     x
 end
 function *(P::IChebyshevUTransformPlan{T,1,K,true}, x::AbstractVector{T}) where {T<:fftwNumber,K}
     n = length(x)
-    n ≤ 1 && return x
+    n ≤ 1 && return x
 
     x = P.plan * x
     _ichebyu1_postscale!(P.plan.region, x)
@@ -501,7 +501,7 @@ end
 function mul!(y::AbstractVector{T}, P::IChebyshevUTransformPlan{T,1,K,false}, x::AbstractVector{T}) where {T<:fftwNumber,K}
     n = length(x)
     length(y) == n || throw(DimensionMismatch("output must match dimension"))
-    n ≤ 1 && return x
+    n ≤ 1 && return x
 
     _plan_mul!(y, P.plan, x)
     _ichebyu1_postscale!(P.plan.region, y)
@@ -510,7 +510,7 @@ end
 function _ichebu2_rescale!(_, x::AbstractVector{T}) where T
     n = length(x)
     c = one(T)/ (n+1)
-    for k=1:n # sqrt(1-x_j^2) weight
+    for k=1:n # sqrt(1-x_j^2) weight
         x[k] /= sinpi(k*c)
     end
     ldiv!(2, x)
@@ -519,7 +519,7 @@ end
 
 function *(P::IChebyshevUTransformPlan{T,2,K,true}, x::AbstractVector{T}) where {T<:fftwNumber,K}
     n = length(x)
-    n ≤ 1 && return x
+    n ≤ 1 && return x
 
     x = P.plan * x
     _ichebu2_rescale!(P.plan.region, x)
@@ -528,7 +528,7 @@ end
 function mul!(y::AbstractVector{T}, P::IChebyshevUTransformPlan{T,2,K,false}, x::AbstractVector{T}) where {T<:fftwNumber,K}
     n = length(x)
     length(y) == n || throw(DimensionMismatch("output must match dimension"))
-    n ≤ 1 && return x
+    n ≤ 1 && return x
 
     _plan_mul!(y, P.plan, x)
     _ichebu2_rescale!(P.plan.region, y)
@@ -679,4 +679,4 @@ end
 *(P::IChebyshevTransformPlan{T,1,Nothing,true,N,R}, x::AbstractVector{T}) where {T,N,R} =
     copyto!(x, IChebyshevTransformPlan{T,1,Nothing,false,N,R}() * x)
 # *(P::IChebyshevTransformPlan{T,SECONDKIND,false,Nothing}, x::AbstractVector{T}) where T =
-#     IChebyshevTransformPlan{T,SECONDKIND,true,Nothing}() * copy(x)
\ No newline at end of file
+#     IChebyshevTransformPlan{T,SECONDKIND,true,Nothing}() * copy(x)
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index a0e188ad..76ba9ec5 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -191,7 +191,7 @@ using FastTransforms, Test
             @test ichebyshevtransform(X,Val(2),2) ≈ ichebyshevtransform!(copy(X),Val(2),2) ≈ hcat(ichebyshevtransform.([X[k,:] for k=axes(X,1)],Val(2))...)'
 
             @test @inferred(ichebyshevtransform(X)) ≈ @inferred(ichebyshevtransform!(copy(X))) ≈ ichebyshevtransform(ichebyshevtransform(X,1),2)
-            @test @inferred(ichebyshevtransform(X,Val(2))) ≈ @inferred(ichebyshevtransform!(copy(X),Val(2))) ≈ ichebyshevtransform(ichebyshevtransform(X,Val(2),1),Val(2),2)            
+            @test @inferred(ichebyshevtransform(X,Val(2))) ≈ @inferred(ichebyshevtransform!(copy(X),Val(2))) ≈ ichebyshevtransform(ichebyshevtransform(X,Val(2),1),Val(2),2)
 
             @test ichebyshevtransform(chebyshevtransform(X)) ≈ X
             @test chebyshevtransform(ichebyshevtransform(X)) ≈ X
@@ -208,7 +208,7 @@ using FastTransforms, Test
         @test chebyshevtransform([1,2,3], Val(2)) == chebyshevtransform([1.,2,3], Val(2))
         @test ichebyshevtransform([1,2,3]) == ichebyshevtransform([1.,2,3])
         @test ichebyshevtransform([1,2,3], Val(2)) == ichebyshevtransform([1.,2,3], Val(2))
-        
+
         @test chebyshevutransform([1,2,3]) == chebyshevutransform([1.,2,3])
         @test chebyshevutransform([1,2,3], Val(2)) == chebyshevutransform([1.,2,3], Val(2))
         @test ichebyshevutransform([1,2,3]) == ichebyshevutransform([1.,2,3])
diff --git a/test/clenshawtests.jl b/test/clenshawtests.jl
index 14485c38..f4e0798d 100644
--- a/test/clenshawtests.jl
+++ b/test/clenshawtests.jl
@@ -14,9 +14,9 @@ import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrenc
             @test @inferred(clenshaw(elty[],1)) ≡ zero(elty)
 
             x = elty[1,0,0.1]
-            @test @inferred(clenshaw(c,x)) ≈ @inferred(clenshaw!(c,copy(x))) ≈ 
+            @test @inferred(clenshaw(c,x)) ≈ @inferred(clenshaw!(c,copy(x))) ≈
                 @inferred(clenshaw!(c,x,similar(x))) ≈
-                @inferred(clenshaw(cf,x)) ≈ @inferred(clenshaw!(cf,copy(x))) ≈ 
+                @inferred(clenshaw(cf,x)) ≈ @inferred(clenshaw!(cf,copy(x))) ≈
                 @inferred(clenshaw!(cf,x,similar(x))) ≈ elty[6,-2,-1.74]
 
             @testset "Strided" begin
@@ -34,7 +34,7 @@ import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrenc
             @testset "matrix coefficients" begin
                 c = [1 2; 3 4; 5 6]
                 @test clenshaw(c,0.1) ≈ [clenshaw(c[:,1],0.1), clenshaw(c[:,2],0.1)]
-                @test clenshaw(c,[0.1,0.2]) ≈ [clenshaw(c[:,1], 0.1) clenshaw(c[:,2], 0.1); clenshaw(c[:,1], 0.2) clenshaw(c[:,2], 0.2)] 
+                @test clenshaw(c,[0.1,0.2]) ≈ [clenshaw(c[:,1], 0.1) clenshaw(c[:,2], 0.1); clenshaw(c[:,1], 0.2) clenshaw(c[:,2], 0.2)]
             end
         end
     end
@@ -45,19 +45,19 @@ import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrenc
         @testset "forwardrecurrence!" begin
             @test @inferred(forwardrecurrence(N, A, B, C, 1)) == @inferred(forwardrecurrence!(Vector{Int}(undef,N), A, B, C, 1)) == 1:N
             @test forwardrecurrence!(Vector{Int}(undef,N), A, B, C, -1) == (-1) .^ (0:N-1) .* (1:N)
-            @test forwardrecurrence(N, A, B, C, 0.1) ≈ forwardrecurrence!(Vector{Float64}(undef,N), A, B, C, 0.1) ≈ 
+            @test forwardrecurrence(N, A, B, C, 0.1) ≈ forwardrecurrence!(Vector{Float64}(undef,N), A, B, C, 0.1) ≈
                     sin.((1:N) .* acos(0.1)) ./ sqrt(1-0.1^2)
         end
 
         c = [1,2,3]
-        @test c'forwardrecurrence(3, A, B, C, 0.1) ≈ clenshaw([1,2,3], A, B, C, 0.1) ≈ 
+        @test c'forwardrecurrence(3, A, B, C, 0.1) ≈ clenshaw([1,2,3], A, B, C, 0.1) ≈
             1 + (2sin(2acos(0.1)) + 3sin(3acos(0.1)))/sqrt(1-0.1^2)
 
         @testset "matrix coefficients" begin
             c = [1 2; 3 4; 5 6]
             @test clenshaw(c,A,B,C,0.1) ≈ [clenshaw(c[:,1],A,B,C,0.1), clenshaw(c[:,2],A,B,C,0.1)]
-            @test clenshaw(c,A,B,C,[0.1,0.2]) ≈ [clenshaw(c[:,1], A,B,C,0.1) clenshaw(c[:,2], A,B,C,0.1); clenshaw(c[:,1], A,B,C,0.2) clenshaw(c[:,2], A,B,C,0.2)] 
-        end            
+            @test clenshaw(c,A,B,C,[0.1,0.2]) ≈ [clenshaw(c[:,1], A,B,C,0.1) clenshaw(c[:,2], A,B,C,0.1); clenshaw(c[:,1], A,B,C,0.2) clenshaw(c[:,2], A,B,C,0.2)]
+        end
     end
 
     @testset "Chebyshev-as-general" begin
@@ -79,10 +79,10 @@ import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrenc
         @test @inferred(clenshaw(Float64[], A, B, C, 1)) ≡ 0.0
 
         x = [1,0,0.1]
-        @test @inferred(clenshaw(c, A, B, C, x)) ≈ @inferred(clenshaw!(c, A, B, C, copy(x))) ≈ 
+        @test @inferred(clenshaw(c, A, B, C, x)) ≈ @inferred(clenshaw!(c, A, B, C, copy(x))) ≈
             @inferred(clenshaw!(c, A, B, C, x, one.(x), similar(x))) ≈
             @inferred(clenshaw!(cf, Af, Bf, Cf, x, one.(x),similar(x))) ≈
-            @inferred(clenshaw([1.,2,3], A, B, C, x)) ≈ 
+            @inferred(clenshaw([1.,2,3], A, B, C, x)) ≈
             @inferred(clenshaw!([1.,2,3], A, B, C, copy(x))) ≈ [6,-2,-1.74]
     end
 
@@ -136,4 +136,4 @@ import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrenc
         c = randn(N)
         @test clenshaw(c, A, B, C, 0.1) == clenshaw(c, A, Vector(B), C, 0.1)
     end
-end
\ No newline at end of file
+end

From 67b345e9601edb3f3a509c23a504f0a3172f7887 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub@users.noreply.github.com>
Date: Thu, 4 Aug 2022 01:37:36 +0530
Subject: [PATCH 107/222] Enum of transforms (#181)

---
 Project.toml             |  2 +-
 src/libfasttransforms.jl | 81 +++++++++++++++++++++-------------------
 2 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/Project.toml b/Project.toml
index 8ed12314..9d76b9ed 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.1"
+version = "0.14.2"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index f7a44189..b97f9991 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -103,44 +103,47 @@ function clenshaw!(c::StridedVector{Float32}, A::Vector{Float32}, B::Vector{Floa
     f
 end
 
-const LEG2CHEB              = 0
-const CHEB2LEG              = 1
-const ULTRA2ULTRA           = 2
-const JAC2JAC               = 3
-const LAG2LAG               = 4
-const JAC2ULTRA             = 5
-const ULTRA2JAC             = 6
-const JAC2CHEB              = 7
-const CHEB2JAC              = 8
-const ULTRA2CHEB            = 9
-const CHEB2ULTRA           = 10
-const ASSOCIATEDJAC2JAC    = 11
-const MODIFIEDJAC2JAC      = 12
-const MODIFIEDLAG2LAG      = 13
-const MODIFIEDHERM2HERM    = 14
-const SPHERE               = 15
-const SPHEREV              = 16
-const DISK                 = 17
-const RECTDISK             = 18
-const TRIANGLE             = 19
-const TETRAHEDRON          = 20
-const SPINSPHERE           = 21
-const SPHERESYNTHESIS      = 22
-const SPHEREANALYSIS       = 23
-const SPHEREVSYNTHESIS     = 24
-const SPHEREVANALYSIS      = 25
-const DISKSYNTHESIS        = 26
-const DISKANALYSIS         = 27
-const RECTDISKSYNTHESIS    = 28
-const RECTDISKANALYSIS     = 29
-const TRIANGLESYNTHESIS    = 30
-const TRIANGLEANALYSIS     = 31
-const TETRAHEDRONSYNTHESIS = 32
-const TETRAHEDRONANALYSIS  = 33
-const SPINSPHERESYNTHESIS  = 34
-const SPINSPHEREANALYSIS   = 35
-const SPHERICALISOMETRY    = 36
-
+@enum Transforms::Cint begin
+    LEG2CHEB=0
+    CHEB2LEG
+    ULTRA2ULTRA
+    JAC2JAC
+    LAG2LAG
+    JAC2ULTRA
+    ULTRA2JAC
+    JAC2CHEB
+    CHEB2JAC
+    ULTRA2CHEB
+    CHEB2ULTRA
+    ASSOCIATEDJAC2JAC
+    MODIFIEDJAC2JAC
+    MODIFIEDLAG2LAG
+    MODIFIEDHERM2HERM
+    SPHERE
+    SPHEREV
+    DISK
+    RECTDISK
+    TRIANGLE
+    TETRAHEDRON
+    SPINSPHERE
+    SPHERESYNTHESIS
+    SPHEREANALYSIS
+    SPHEREVSYNTHESIS
+    SPHEREVANALYSIS
+    DISKSYNTHESIS
+    DISKANALYSIS
+    RECTDISKSYNTHESIS
+    RECTDISKANALYSIS
+    TRIANGLESYNTHESIS
+    TRIANGLEANALYSIS
+    TETRAHEDRONSYNTHESIS
+    TETRAHEDRONANALYSIS
+    SPINSPHERESYNTHESIS
+    SPINSPHEREANALYSIS
+    SPHERICALISOMETRY
+end
+
+Transforms(t::Transforms) = t
 
 let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                CHEB2LEG             => "Chebyshev--Legendre",
@@ -180,7 +183,7 @@ let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                SPINSPHEREANALYSIS   => "FFTW Fourier analysis on the sphere (spin-weighted)",
                SPHERICALISOMETRY    => "Spherical isometry")
     global kind2string
-    kind2string(k::Integer) = k2s[Int(k)]
+    kind2string(k::Union{Integer, Transforms}) = k2s[Transforms(k)]
 end
 
 struct ft_plan_struct end

From a80a47d52616c8b34a5042f60d0c3a256bf8b944 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 3 Aug 2022 14:16:52 -0600
Subject: [PATCH 108/222] add tests from #179 (#183)

---
 test/libfasttransformstests.jl | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 8d57f013..5c80aaad 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -28,6 +28,9 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         y = p1*x
         z = p2*y
         @test z ≈ x
+        y = p1*view(x, :)
+        z = p2*view(y, :)
+        @test z ≈ x
         y = p1*x
         z = p1'y
         y = transpose(p1)*z
@@ -35,6 +38,13 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         y = p1'\z
         z = p1\y
         @test z ≈ x
+        y = p1*view(x, :)
+        z = p1'view(y, :)
+        y = transpose(p1)*view(z, :)
+        z = transpose(p1)\view(y, :)
+        y = p1'\view(z, :)
+        z = p1\view(y, :)
+        @test z ≈ x
         y = p2*x
         z = p2'y
         y = transpose(p2)*z
@@ -42,6 +52,13 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         y = p2'\z
         z = p2\y
         @test z ≈ x
+        y = p2*view(x, :)
+        z = p2'view(y, :)
+        y = transpose(p2)*view(z, :)
+        z = transpose(p2)\view(y, :)
+        y = p2'\view(z, :)
+        z = p2\view(y, :)
+        @test z ≈ x
         P = p1*I
         Q = p2*P
         @test Q ≈ I

From e1d608686bc4db8cb8efb4da62753c5580513bc1 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 3 Aug 2022 14:46:18 -0600
Subject: [PATCH 109/222] farm out BigFloat FFT to GenericFFT

preserve `conv` pirating until DSP's PR gets merged and tagged https://github.com/JuliaDSP/DSP.jl/pull/477
---
 Project.toml             |   2 +
 src/FastTransforms.jl    |   1 +
 src/fftBigFloat.jl       | 349 +--------------------------------------
 test/fftBigFloattests.jl | 110 +-----------
 4 files changed, 9 insertions(+), 453 deletions(-)

diff --git a/Project.toml b/Project.toml
index 9d76b9ed..20f2dbbb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -9,6 +9,7 @@ FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
 FastTransforms_jll = "34b6f7d7-08f9-5794-9e10-3819e4c7e49a"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
+GenericFFT = "a8297547-1b15-4a5a-a998-a2ac5f1cef28"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
@@ -23,6 +24,7 @@ FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.6.0"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
+GenericFFT = "0.1"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
 ToeplitzMatrices = "0.6, 0.7"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 40d7dc16..9c1cddc6 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -7,6 +7,7 @@ import DSP
 
 @reexport using AbstractFFTs
 @reexport using FFTW
+@reexport using GenericFFT
 
 import Base: convert, unsafe_convert, eltype, ndims, adjoint, transpose, show,
              *, \, inv, length, size, view, getindex
diff --git a/src/fftBigFloat.jl b/src/fftBigFloat.jl
index 64b7807e..e67f1143 100644
--- a/src/fftBigFloat.jl
+++ b/src/fftBigFloat.jl
@@ -1,345 +1,4 @@
-const AbstractFloats = Union{AbstractFloat,Complex{T} where T<:AbstractFloat}
-
-# We use these type definitions for clarity
-const RealFloats = T where T<:AbstractFloat
-const ComplexFloats = Complex{T} where T<:AbstractFloat
-
-
-# The following implements Bluestein's algorithm, following http://www.dsprelated.com/dspbooks/mdft/Bluestein_s_FFT_Algorithm.html
-# To add more types, add them in the union of the function's signature.
-
-function generic_fft(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
-    region == 1 && (ret = generic_fft(x))
-    ret
-end
-
-function generic_fft!(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
-    region == 1 && (x[:] .= generic_fft(x))
-    x
-end
-
-function generic_fft(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
-    region == 1:1 && (ret = generic_fft(x))
-    ret
-end
-
-function generic_fft!(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
-    region == 1:1 && (x[:] .= generic_fft(x))
-    x
-end
-
-function generic_fft(x::StridedMatrix{T}, region::Integer) where T<:AbstractFloats
-    if region == 1
-        ret = hcat([generic_fft(x[:, j]) for j in 1:size(x, 2)]...)
-    end
-    ret
-end
-
-function generic_fft!(x::StridedMatrix{T}, region::Integer) where T<:AbstractFloats
-    if region == 1
-        for j in 1:size(x, 2)
-            x[:, j] .= generic_fft(x[:, j])
-        end
-    end
-    x
-end
-
-function generic_fft(x::Vector{T}) where T<:AbstractFloats
-    T <: FFTW.fftwNumber && (@warn("Using generic fft for FFTW number type."))
-    n = length(x)
-    ispow2(n) && return generic_fft_pow2(x)
-    ks = range(zero(real(T)),stop=n-one(real(T)),length=n)
-    Wks = exp.((-im).*convert(T,π).*ks.^2 ./ n)
-    xq, wq = x.*Wks, conj([exp(-im*convert(T,π)*n);reverse(Wks);Wks[2:end]])
-    return Wks.*_conv!(xq,wq)[n+1:2n]
-end
-
-generic_bfft(x::StridedArray{T, N}, region) where {T <: AbstractFloats, N} = conj!(generic_fft(conj(x), region))
-generic_bfft!(x::StridedArray{T, N}, region) where {T <: AbstractFloats, N} = conj!(generic_fft!(conj!(x), region))
-generic_ifft(x::StridedArray{T, N}, region) where {T<:AbstractFloats, N} = ldiv!(length(x), conj!(generic_fft(conj(x), region)))
-generic_ifft!(x::StridedArray{T, N}, region) where {T<:AbstractFloats, N} = ldiv!(length(x), conj!(generic_fft!(conj!(x), region)))
-
-generic_rfft(v::Vector{T}, region) where T<:AbstractFloats = generic_fft(v, region)[1:div(length(v),2)+1]
-function generic_irfft(v::Vector{T}, n::Integer, region) where T<:ComplexFloats
-    @assert n==2length(v)-1
-    r = Vector{T}(undef, n)
-    r[1:length(v)]=v
-    r[length(v)+1:end]=reverse(conj(v[2:end]))
-    real(generic_ifft(r, region))
-end
-generic_brfft(v::StridedArray, n::Integer, region) = generic_irfft(v, n, region)*n
-
-function _conv!(u::StridedVector{T}, v::StridedVector{T}) where T<:AbstractFloats
-    nu = length(u)
-    nv = length(v)
-    n = nu + nv - 1
-    np2 = nextpow(2, n)
-    append!(u, zeros(T, np2-nu))
-    append!(v, zeros(T, np2-nv))
-    y = generic_ifft_pow2(generic_fft_pow2(u).*generic_fft_pow2(v))
-    #TODO This would not handle Dual/ComplexDual numbers correctly
-    y = T<:Real ? real(y[1:n]) : y[1:n]
-end
-
-conv(u::AbstractArray{T, N}, v::AbstractArray{T, N}) where {T<:AbstractFloat, N} = _conv!(deepcopy(u), deepcopy(v))
-conv(u::AbstractArray{T, N}, v::AbstractArray{Complex{T}, N}) where {T<:AbstractFloat, N} = _conv!(complex(deepcopy(u)), deepcopy(v))
-conv(u::AbstractArray{Complex{T}, N}, v::AbstractArray{T, N}) where {T<:AbstractFloat, N} = _conv!(deepcopy(u), complex(deepcopy(v)))
-conv(u::AbstractArray{Complex{T}, N}, v::AbstractArray{Complex{T}, N}) where {T<:AbstractFloat, N} = _conv!(deepcopy(u), deepcopy(v))
-
-# This is a Cooley-Tukey FFT algorithm inspired by many widely available algorithms including:
-# c_radix2.c in the GNU Scientific Library and four1 in the Numerical Recipes in C.
-# However, the trigonometric recurrence is improved for greater efficiency.
-# The algorithm starts with bit-reversal, then divides and conquers in-place.
-function generic_fft_pow2!(x::Vector{T}) where T<:AbstractFloat
-    n,big2=length(x),2one(T)
-    nn,j=n÷2,1
-    for i=1:2:n-1
-        if j>i
-            x[j], x[i] = x[i], x[j]
-            x[j+1], x[i+1] = x[i+1], x[j+1]
-        end
-        m = nn
-        while m ≥ 2 && j > m
-            j -= m
-            m = m÷2
-        end
-        j += m
-    end
-    logn = 2
-    while logn < n
-        θ=-big2/logn
-        wtemp = sinpi(θ/2)
-        wpr, wpi = -2wtemp^2, sinpi(θ)
-        wr, wi = one(T), zero(T)
-        for m=1:2:logn-1
-            for i=m:2logn:n
-                j=i+logn
-                mixr, mixi = wr*x[j]-wi*x[j+1], wr*x[j+1]+wi*x[j]
-                x[j], x[j+1] = x[i]-mixr, x[i+1]-mixi
-                x[i], x[i+1] = x[i]+mixr, x[i+1]+mixi
-            end
-            wr = (wtemp=wr)*wpr-wi*wpi+wr
-            wi = wi*wpr+wtemp*wpi+wi
-        end
-        logn = logn << 1
-    end
-    return x
-end
-
-function generic_fft_pow2(x::Vector{Complex{T}}) where T<:AbstractFloat
-    y = interlace(real(x), imag(x))
-    generic_fft_pow2!(y)
-    return complex.(y[1:2:end], y[2:2:end])
-end
-generic_fft_pow2(x::Vector{T}) where T<:AbstractFloat = generic_fft_pow2(complex(x))
-
-function generic_ifft_pow2(x::Vector{Complex{T}}) where T<:AbstractFloat
-    y = interlace(real(x), -imag(x))
-    generic_fft_pow2!(y)
-    return ldiv!(length(x), conj!(complex.(y[1:2:end], y[2:2:end])))
-end
-
-function generic_dct(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
-    region == 1 && (ret = generic_dct(x))
-    ret
-end
-
-function generic_dct!(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
-    region == 1 && (x[:] .= generic_dct(x))
-    x
-end
-
-function generic_idct(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
-    region == 1 && (ret = generic_idct(x))
-    ret
-end
-
-function generic_idct!(x::StridedVector{T}, region::Integer) where T<:AbstractFloats
-    region == 1 && (x[:] .= generic_idct(x))
-    x
-end
-
-function generic_dct(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
-    region == 1:1 && (ret = generic_dct(x))
-    ret
-end
-
-function generic_dct!(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
-    region == 1:1 && (x[:] .= generic_dct(x))
-    x
-end
-
-function generic_idct(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
-    region == 1:1 && (ret = generic_idct(x))
-    ret
-end
-
-function generic_idct!(x::StridedVector{T}, region::UnitRange{I}) where {T<:AbstractFloats, I<:Integer}
-    region == 1:1 && (x[:] .= generic_idct(x))
-    x
-end
-
-function generic_dct(a::AbstractVector{Complex{T}}) where {T <: AbstractFloat}
-    T <: FFTW.fftwNumber && (@warn("Using generic dct for FFTW number type."))
-    N = length(a)
-    twoN = convert(T,2) * N
-    c = generic_fft([a; reverse(a, dims=1)]) # c = generic_fft([a; flipdim(a,1)])
-    d = c[1:N]
-    d .*= exp.((-im*convert(T, pi)).*(0:N-1)./twoN)
-    d[1] = d[1] / sqrt(convert(T, 2))
-    lmul!(inv(sqrt(twoN)), d)
-end
-
-generic_dct(a::AbstractArray{T}) where {T <: AbstractFloat} = real(generic_dct(complex(a)))
-
-function generic_idct(a::AbstractVector{Complex{T}}) where {T <: AbstractFloat}
-    T <: FFTW.fftwNumber && (@warn("Using generic idct for FFTW number type."))
-    N = length(a)
-    twoN = convert(T,2)*N
-    b = a * sqrt(twoN)
-    b[1] = b[1] * sqrt(convert(T,2))
-    shift = exp.(-im * 2 * convert(T, pi) * (N - convert(T,1)/2) * (0:(2N-1)) / twoN)
-    b = [b; 0; -reverse(b[2:end], dims=1)] .* shift # b = [b; 0; -flipdim(b[2:end],1)] .* shift
-    c = ifft(b)
-    reverse(c[1:N]; dims=1)#flipdim(c[1:N],1)
-end
-
-generic_idct(a::AbstractArray{T}) where {T <: AbstractFloat} = real(generic_idct(complex(a)))
-
-
-# These lines mimick the corresponding ones in FFTW/src/dct.jl, but with
-# AbstractFloat rather than fftwNumber.
-for f in (:dct, :dct!, :idct, :idct!)
-    pf = Symbol("plan_", f)
-    @eval begin
-        $f(x::AbstractArray{<:AbstractFloats}) = $pf(x) * x
-        $f(x::AbstractArray{<:AbstractFloats}, region) = $pf(x, region) * x
-    end
-end
-
-# dummy plans
-abstract type DummyPlan{T} <: Plan{T} end
-for P in (:DummyFFTPlan, :DummyiFFTPlan, :DummybFFTPlan, :DummyDCTPlan, :DummyiDCTPlan)
-    # All plans need an initially undefined pinv field
-    @eval begin
-        mutable struct $P{T,inplace,G} <: DummyPlan{T}
-            region::G # region (iterable) of dims that are transformed
-            pinv::DummyPlan{T}
-            $P{T,inplace,G}(region::G) where {T<:AbstractFloats, inplace, G} = new(region)
-        end
-    end
-end
-for P in (:DummyrFFTPlan, :DummyirFFTPlan, :DummybrFFTPlan)
-    @eval begin
-        mutable struct $P{T,inplace,G} <: DummyPlan{T}
-            n::Integer
-            region::G # region (iterable) of dims that are transformed
-            pinv::DummyPlan{T}
-            $P{T,inplace,G}(n::Integer, region::G) where {T<:AbstractFloats, inplace, G} = new(n, region)
-        end
-    end
-end
-
-for (Plan,iPlan) in ((:DummyFFTPlan,:DummyiFFTPlan),
-                     (:DummyDCTPlan,:DummyiDCTPlan))
-   @eval begin
-       plan_inv(p::$Plan{T,inplace,G}) where {T,inplace,G} = $iPlan{T,inplace,G}(p.region)
-       plan_inv(p::$iPlan{T,inplace,G}) where {T,inplace,G} = $Plan{T,inplace,G}(p.region)
-    end
-end
-
-# Specific for rfft, irfft and brfft:
-plan_inv(p::DummyirFFTPlan{T,inplace,G}) where {T,inplace,G} = DummyrFFTPlan{T,Inplace,G}(p.n, p.region)
-plan_inv(p::DummyrFFTPlan{T,inplace,G}) where {T,inplace,G} = DummyirFFTPlan{T,Inplace,G}(p.n, p.region)
-
-
-
-for (Plan,ff,ff!) in ((:DummyFFTPlan,:generic_fft,:generic_fft!),
-                      (:DummybFFTPlan,:generic_bfft,:generic_bfft!),
-                      (:DummyiFFTPlan,:generic_ifft,:generic_ifft!),
-                      (:DummyrFFTPlan,:generic_rfft,:generic_rfft!),
-                      (:DummyDCTPlan,:generic_dct,:generic_dct!),
-                      (:DummyiDCTPlan,:generic_idct,:generic_idct!))
-    @eval begin
-        *(p::$Plan{T,true}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = $ff!(x, p.region)
-        *(p::$Plan{T,false}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = $ff(x, p.region)
-        function mul!(C::StridedVector, p::$Plan, x::StridedVector)
-            C[:] = $ff(x, p.region)
-            C
-        end
-    end
-end
-
-# Specific for irfft and brfft:
-*(p::DummyirFFTPlan{T,true}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_irfft!(x, p.n, p.region)
-*(p::DummyirFFTPlan{T,false}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_irfft(x, p.n, p.region)
-function mul!(C::StridedVector, p::DummyirFFTPlan, x::StridedVector)
-    C[:] = generic_irfft(x, p.n, p.region)
-    C
-end
-*(p::DummybrFFTPlan{T,true}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_brfft!(x, p.n, p.region)
-*(p::DummybrFFTPlan{T,false}, x::StridedArray{T,N}) where {T<:AbstractFloats,N} = generic_brfft(x, p.n, p.region)
-function mul!(C::StridedVector, p::DummybrFFTPlan, x::StridedVector)
-    C[:] = generic_brfft(x, p.n, p.region)
-    C
-end
-
-
-# We override these for AbstractFloat, so that conversion from reals to
-# complex numbers works for any AbstractFloat (instead of only BlasFloat's)
-AbstractFFTs.complexfloat(x::StridedArray{Complex{<:AbstractFloat}}) = x
-AbstractFFTs.realfloat(x::StridedArray{<:Real}) = x
-# We override this one in order to avoid throwing an error that the type is
-# unsupported (as defined in AbstractFFTs)
-AbstractFFTs._fftfloat(::Type{T}) where {T <: AbstractFloat} = T
-
-
-# We intercept the calls to plan_X(x, region) below.
-# In order not to capture any calls that should go to FFTW, we have to be
-# careful about the typing, so that the calls to FFTW remain more specific.
-# This is the reason for using StridedArray below. We also have to carefully
-# distinguish between real and complex arguments.
-
-plan_fft(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyFFTPlan{Complex{real(T)},false,typeof(region)}(region)
-plan_fft!(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyFFTPlan{Complex{real(T)},true,typeof(region)}(region)
-
-plan_bfft(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummybFFTPlan{Complex{real(T)},false,typeof(region)}(region)
-plan_bfft!(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummybFFTPlan{Complex{real(T)},true,typeof(region)}(region)
-
-# The ifft plans are automatically provided in terms of the bfft plans above.
-# plan_ifft(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyiFFTPlan{Complex{real(T)},false,typeof(region)}(region)
-# plan_ifft!(x::StridedArray{T}, region) where {T <: ComplexFloats} = DummyiFFTPlan{Complex{real(T)},true,typeof(region)}(region)
-
-plan_dct(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyDCTPlan{T,false,typeof(region)}(region)
-plan_dct!(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyDCTPlan{T,true,typeof(region)}(region)
-
-plan_idct(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyiDCTPlan{T,false,typeof(region)}(region)
-plan_idct!(x::StridedArray{T}, region) where {T <: AbstractFloats} = DummyiDCTPlan{T,true,typeof(region)}(region)
-
-plan_rfft(x::StridedArray{T}, region) where {T <: RealFloats} = DummyrFFTPlan{Complex{real(T)},false,typeof(region)}(length(x), region)
-plan_brfft(x::StridedArray{T}, n::Integer, region) where {T <: ComplexFloats} = DummybrFFTPlan{Complex{real(T)},false,typeof(region)}(n, region)
-
-# A plan for irfft is created in terms of a plan for brfft.
-# plan_irfft(x::StridedArray{T}, n::Integer, region) where {T <: ComplexFloats} = DummyirFFTPlan{Complex{real(T)},false,typeof(region)}(n, region)
-
-# These don't exist for now:
-# plan_rfft!(x::StridedArray{T}) where {T <: RealFloats} = DummyrFFTPlan{Complex{real(T)},true}()
-# plan_irfft!(x::StridedArray{T},n::Integer) where {T <: RealFloats} = DummyirFFTPlan{Complex{real(T)},true}()
-
-function interlace(a::Vector{S},b::Vector{V}) where {S<:Number,V<:Number}
-    na=length(a);nb=length(b)
-    T=promote_type(S,V)
-    if nb≥na
-        ret=zeros(T,2nb)
-        ret[1:2:1+2*(na-1)]=a
-        ret[2:2:end]=b
-        ret
-    else
-        ret=zeros(T,2na-1)
-        ret[1:2:end]=a
-        if !isempty(b)
-            ret[2:2:2+2*(nb-1)]=b
-        end
-        ret
-    end
-end
+conv(u::AbstractArray{T, N}, v::AbstractArray{T, N}) where {T<:AbstractFloat, N} = GenericFFT._conv!(deepcopy(u), deepcopy(v))
+conv(u::AbstractArray{T, N}, v::AbstractArray{Complex{T}, N}) where {T<:AbstractFloat, N} = GenericFFT._conv!(complex(deepcopy(u)), deepcopy(v))
+conv(u::AbstractArray{Complex{T}, N}, v::AbstractArray{T, N}) where {T<:AbstractFloat, N} = GenericFFT._conv!(deepcopy(u), complex(deepcopy(v)))
+conv(u::AbstractArray{Complex{T}, N}, v::AbstractArray{Complex{T}, N}) where {T<:AbstractFloat, N} = GenericFFT._conv!(deepcopy(u), deepcopy(v))
diff --git a/test/fftBigFloattests.jl b/test/fftBigFloattests.jl
index 2dfd2988..04040571 100644
--- a/test/fftBigFloattests.jl
+++ b/test/fftBigFloattests.jl
@@ -1,115 +1,9 @@
-using DSP, FFTW, FastTransforms, LinearAlgebra, Test
-
-@testset "BigFloat FFT and DCT" begin
-
-    c = collect(range(-big(1.0),stop=1,length=16))
-    @test norm(fft(c) - fft(Float64.(c))) < 3Float64(norm(c))*eps()
-    @test norm(ifft(c) - ifft(Float64.(c))) < 3Float64(norm(c))*eps()
-
-    c = collect(range(-big(1.0),stop=1.0,length=201))
-    @test norm(ifft(fft(c))-c) < 200norm(c)eps(BigFloat)
+using DSP, FastTransforms, Test
 
+@testset "BigFloat Convolution" begin
     s = big(1) ./ (1:10)
     s64 = Float64.(s)
     @test Float64.(conv(s, s)) ≈ conv(s64, s64)
     @test s == big(1) ./ (1:10) #67, ensure conv doesn't overwrite input
     @test all(s64 .=== Float64.(big(1) ./ (1:10)))
-
-    p = plan_dct(c)
-    @test norm(FastTransforms.generic_dct(c) - p*c) == 0
-
-    pi = plan_idct!(c)
-    @test norm(pi*dct(c) - c) < 1000norm(c)*eps(BigFloat)
-
-    @test norm(dct(c)-dct(map(Float64,c)),Inf) < 10eps()
-
-    cc = cis.(c)
-    @test norm(dct(cc)-dct(map(Complex{Float64},cc)),Inf) < 10eps()
-
-    c = big.(rand(100)) + im*big.(rand(100))
-    @test norm(dct(c)-dct(map(ComplexF64,c)),Inf) < 10eps()
-    @test norm(idct(c)-idct(map(ComplexF64,c)),Inf) < 10eps()
-    @test norm(idct(dct(c))-c,Inf) < 1000eps(BigFloat)
-    @test norm(dct(idct(c))-c,Inf) < 1000eps(BigFloat)
-
-    c = randn(ComplexF16, 20)
-    p = plan_fft(c)
-    @test inv(p) * (p * c) ≈ c
-
-    c = randn(ComplexF16, 20)
-    pinpl = plan_fft!(c)
-    @test inv(pinpl) * (pinpl * c) ≈ c
-
-    # Make sure we don't accidentally hijack any FFTW plans
-    for T in (Float32, Float64)
-        @test plan_fft(rand(BigFloat,10)) isa FastTransforms.DummyPlan
-        @test plan_fft(rand(BigFloat,10), 1:1) isa FastTransforms.DummyPlan
-        @test plan_fft(rand(Complex{BigFloat},10)) isa FastTransforms.DummyPlan
-        @test plan_fft(rand(Complex{BigFloat},10), 1:1) isa FastTransforms.DummyPlan
-        @test plan_fft!(rand(Complex{BigFloat},10)) isa FastTransforms.DummyPlan
-        @test plan_fft!(rand(Complex{BigFloat},10), 1:1) isa FastTransforms.DummyPlan
-        @test !( plan_fft(rand(T,10)) isa FastTransforms.DummyPlan )
-        @test !( plan_fft(rand(T,10), 1:1) isa FastTransforms.DummyPlan )
-        @test !( plan_fft(rand(Complex{T},10)) isa FastTransforms.DummyPlan )
-        @test !( plan_fft(rand(Complex{T},10), 1:1) isa FastTransforms.DummyPlan )
-        @test !( plan_fft!(rand(Complex{T},10)) isa FastTransforms.DummyPlan )
-        @test !( plan_fft!(rand(Complex{T},10), 1:1) isa FastTransforms.DummyPlan )
-
-        @test plan_ifft(rand(T,10)) isa FFTW.ScaledPlan
-        @test plan_ifft(rand(T,10), 1:1) isa FFTW.ScaledPlan
-        @test plan_ifft(rand(Complex{T},10)) isa FFTW.ScaledPlan
-        @test plan_ifft(rand(Complex{T},10), 1:1) isa FFTW.ScaledPlan
-        @test plan_ifft!(rand(Complex{T},10)) isa FFTW.ScaledPlan
-        @test plan_ifft!(rand(Complex{T},10), 1:1) isa FFTW.ScaledPlan
-
-        @test plan_bfft(rand(BigFloat,10)) isa FastTransforms.DummyPlan
-        @test plan_bfft(rand(BigFloat,10), 1:1) isa FastTransforms.DummyPlan
-        @test plan_bfft(rand(Complex{BigFloat},10)) isa FastTransforms.DummyPlan
-        @test plan_bfft(rand(Complex{BigFloat},10), 1:1) isa FastTransforms.DummyPlan
-        @test plan_bfft!(rand(Complex{BigFloat},10)) isa FastTransforms.DummyPlan
-        @test plan_bfft!(rand(Complex{BigFloat},10), 1:1) isa FastTransforms.DummyPlan
-        @test !( plan_bfft(rand(T,10)) isa FastTransforms.DummyPlan )
-        @test !( plan_bfft(rand(T,10), 1:1) isa FastTransforms.DummyPlan )
-        @test !( plan_bfft(rand(Complex{T},10)) isa FastTransforms.DummyPlan )
-        @test !( plan_bfft(rand(Complex{T},10), 1:1) isa FastTransforms.DummyPlan )
-        @test !( plan_bfft!(rand(Complex{T},10)) isa FastTransforms.DummyPlan )
-        @test !( plan_bfft!(rand(Complex{T},10), 1:1) isa FastTransforms.DummyPlan )
-
-        @test plan_dct(rand(BigFloat,10)) isa FastTransforms.DummyPlan
-        @test plan_dct(rand(BigFloat,10), 1:1) isa FastTransforms.DummyPlan
-        @test plan_dct(rand(Complex{BigFloat},10)) isa FastTransforms.DummyPlan
-        @test plan_dct(rand(Complex{BigFloat},10), 1:1) isa FastTransforms.DummyPlan
-        @test plan_dct!(rand(Complex{BigFloat},10)) isa FastTransforms.DummyPlan
-        @test plan_dct!(rand(Complex{BigFloat},10), 1:1) isa FastTransforms.DummyPlan
-        @test !( plan_dct(rand(T,10)) isa FastTransforms.DummyPlan )
-        @test !( plan_dct(rand(T,10), 1:1) isa FastTransforms.DummyPlan )
-        @test !( plan_dct(rand(Complex{T},10)) isa FastTransforms.DummyPlan )
-        @test !( plan_dct(rand(Complex{T},10), 1:1) isa FastTransforms.DummyPlan )
-        @test !( plan_dct!(rand(Complex{T},10)) isa FastTransforms.DummyPlan )
-        @test !( plan_dct!(rand(Complex{T},10), 1:1) isa FastTransforms.DummyPlan )
-
-        @test plan_idct(rand(BigFloat,10)) isa FastTransforms.DummyPlan
-        @test plan_idct(rand(BigFloat,10), 1:1) isa FastTransforms.DummyPlan
-        @test plan_idct(rand(Complex{BigFloat},10)) isa FastTransforms.DummyPlan
-        @test plan_idct(rand(Complex{BigFloat},10), 1:1) isa FastTransforms.DummyPlan
-        @test plan_idct!(rand(Complex{BigFloat},10)) isa FastTransforms.DummyPlan
-        @test plan_idct!(rand(Complex{BigFloat},10), 1:1) isa FastTransforms.DummyPlan
-        @test !( plan_idct(rand(T,10)) isa FastTransforms.DummyPlan )
-        @test !( plan_idct(rand(T,10), 1:1) isa FastTransforms.DummyPlan )
-        @test !( plan_idct(rand(Complex{T},10)) isa FastTransforms.DummyPlan )
-        @test !( plan_idct(rand(Complex{T},10), 1:1) isa FastTransforms.DummyPlan )
-        @test !( plan_idct!(rand(Complex{T},10)) isa FastTransforms.DummyPlan )
-        @test !( plan_idct!(rand(Complex{T},10), 1:1) isa FastTransforms.DummyPlan )
-
-        @test plan_rfft(rand(BigFloat,10)) isa FastTransforms.DummyPlan
-        @test plan_rfft(rand(BigFloat,10), 1:1) isa FastTransforms.DummyPlan
-        @test plan_brfft(rand(Complex{BigFloat},10), 19) isa FastTransforms.DummyPlan
-        @test plan_brfft(rand(Complex{BigFloat},10), 19, 1:1) isa FastTransforms.DummyPlan
-        @test !( plan_rfft(rand(T,10)) isa FastTransforms.DummyPlan )
-        @test !( plan_rfft(rand(T,10), 1:1) isa FastTransforms.DummyPlan )
-        @test !( plan_brfft(rand(Complex{T},10), 19) isa FastTransforms.DummyPlan )
-        @test !( plan_brfft(rand(Complex{T},10), 19, 1:1) isa FastTransforms.DummyPlan )
-
-    end
-
 end

From 9172225dcb8e55897613b7a910cc7beef777156f Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 3 Aug 2022 15:10:03 -0600
Subject: [PATCH 110/222] update docs

---
 README.md       | 2 +-
 docs/src/dev.md | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 00302bba..b2ec3187 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ julia> using FastTransforms, LinearAlgebra
 
 ## Fast orthogonal polynomial transforms
 
-The 34 orthogonal polynomial transforms are listed in `FastTransforms.kind2string.(0:33)`. Univariate transforms may be planned with the standard normalization or with orthonormalization. For multivariate transforms, the standard normalization may be too severe for floating-point computations, so it is omitted. Here are two examples:
+The orthogonal polynomial transforms are listed in `FastTransforms.Transforms` or `FastTransforms.kind2string.(instances(FastTransforms.Transforms))`. Univariate transforms may be planned with the standard normalization or with orthonormalization. For multivariate transforms, the standard normalization may be too severe for floating-point computations, so it is omitted. Here are two examples:
 
 ### The Chebyshev--Legendre transform
 
diff --git a/docs/src/dev.md b/docs/src/dev.md
index 75c35290..2335abd7 100644
--- a/docs/src/dev.md
+++ b/docs/src/dev.md
@@ -10,10 +10,7 @@ The C library generates assembly for vectorized operations such as single instru
 
 C libraries are easier to call from any other language, partly explaining why the Python package manager Spack [already supports the C library](https://spack.readthedocs.io/en/latest/package_list.html#fasttransforms) through third-party efforts.
 
-In Julia, a parametric composite type with unrestricted type parameters is just about as big as `Any`. Such a type allows the Julia API to far exceed the C API in its ability to unify all of the orthogonal polynomial transforms and present them as linear operators. The `mutable struct FTPlan{T, N, K}`, together with `AdjointFTPlan` and `TransposeFTPlan`, are the core Julia types in this repository. Whereas `T` is understood to represent element type of the plan and `N` represents the number of leading dimensions of the array on which it operates, `K` is a mere integer which serves to distinguish the orthogonal polynomials at play. For example, `FTPlan{Float64, 1, LEG2CHEB}` represents the necessary pre-computations to convert 64-bit Legendre series to Chebyshev series (of the first kind). `N == 1` because Chebyshev and Legendre series are naturally represented with vectors of coefficients. However, this particular plan may operate not only on vectors but also on matrices, column-by-column.
-
-!!! note
-    When working with specialized `FTPlan`s, it is prudent to use the named constants for `K`, such as `FastTransforms.LEG2CHEB`, rather than their literal integer values as these may change when future plans become operational.
+In Julia, a parametric composite type with unrestricted type parameters is just about as big as `Any`. Such a type allows the Julia API to far exceed the C API in its ability to unify all of the orthogonal polynomial transforms and present them as linear operators. The `mutable struct FTPlan{T, N, K}`, together with `AdjointFTPlan` and `TransposeFTPlan`, are the core Julia types in this repository. Whereas `T` is understood to represent element type of the plan and `N` represents the number of leading dimensions of the array on which it operates, `K` is a mere enumeration which serves to distinguish the orthogonal polynomials at play. For example, `FTPlan{Float64, 1, LEG2CHEB}` represents the necessary pre-computations to convert 64-bit Legendre series to Chebyshev series (of the first kind). `N == 1` because Chebyshev and Legendre series are naturally represented with vectors of coefficients. However, this particular plan may operate not only on vectors but also on matrices, column-by-column.
 
 ## The developer's right to build from source
 

From 98e825107e3dc2b81bccc7089355500284ed4101 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Wed, 3 Aug 2022 15:12:46 -0600
Subject: [PATCH 111/222] add code cov token

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b2ec3187..0d45156f 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # FastTransforms.jl
 
-[![Build Status](https://github.com/JuliaApproximation/FastTransforms.jl/workflows/CI/badge.svg)](https://github.com/JuliaApproximation/FastTransforms.jl/actions?query=workflow%3ACI) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/dev)
+[![Build Status](https://github.com/JuliaApproximation/FastTransforms.jl/workflows/CI/badge.svg)](https://github.com/JuliaApproximation/FastTransforms.jl/actions?query=workflow%3ACI) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg?token=BxTvSNgmLL)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/dev)
 
 `FastTransforms.jl` allows the user to conveniently work with orthogonal polynomials with degrees well into the millions.
 

From f918bf6e37b717242f60bf260c48df419317fe80 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub@users.noreply.github.com>
Date: Thu, 4 Aug 2022 22:30:37 +0530
Subject: [PATCH 112/222] Link to FastSphericalHarmonics.jl in Readme (#184)

This package provides a simpler interface to the spherical harmonic transforms defined in FastTransforms.jl
---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 0d45156f..f2d74a2e 100644
--- a/README.md
+++ b/README.md
@@ -95,6 +95,8 @@ julia> norm(F-H)/norm(F)
 
 Due to the structure of the spherical harmonic connection problem, these transforms may also be performed in-place with `lmul!` and `ldiv!`.
 
+See also [FastSphericalHarmonics.jl](https://github.com/eschnett/FastSphericalHarmonics.jl) for a simpler interface to the spherical harmonic transforms defined in this package.
+
 ## Nonuniform fast Fourier transforms
 
 The NUFFTs are implemented thanks to [Alex Townsend](https://github.com/ajt60gaibb):

From 6e8fbb643b3a4d9a1192cf0a37a1c489d5e12f8c Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Thu, 4 Aug 2022 11:42:16 -0600
Subject: [PATCH 113/222] 1D plans can have non-unit stride in dims > 1

---
 Project.toml             |  2 +-
 src/libfasttransforms.jl | 92 ++++++++++++++++++++--------------------
 2 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/Project.toml b/Project.toml
index 20f2dbbb..9978d0d0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.2"
+version = "0.14.3"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index b97f9991..7f71e0a1 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -230,11 +230,9 @@ function checksize(p::FTPlan{T, 1}, x::StridedArray{T}) where T
     end
 end
 
-function checkstrides(p::FTPlan{T, 1}, x::StridedArray{T}) where T
-    sz = size(x)
-    st = strides(x)
-    if (1, cumprod(sz)...) != (st..., length(x))
-        error("FTPlan requires unit strides, x has strides $(strides(x))")
+function checkstride(p::FTPlan{T, 1}, x::StridedArray{T}) where T
+    if stride(x, 1) != 1
+        error("FTPlan requires unit stride in the leading dimension, x has stride $(stride(x, 1)) in the leading dimension.")
     end
 end
 
@@ -336,11 +334,11 @@ function checksize(p::AdjointFTPlan, x)
     end
 end
 
-function checkstrides(p::AdjointFTPlan, x)
+function checkstride(p::AdjointFTPlan, x)
     try
-        checkstrides(p.adjoint, x)
+        checkstride(p.adjoint, x)
     catch
-        checkstrides(p.parent, x)
+        checkstride(p.parent, x)
     end
 end
 
@@ -392,11 +390,11 @@ function checksize(p::TransposeFTPlan, x)
     end
 end
 
-function checkstrides(p::TransposeFTPlan, x)
+function checkstride(p::TransposeFTPlan, x)
     try
-        checkstrides(p.transpose, x)
+        checkstride(p.transpose, x)
     catch
-        checkstrides(p.parent, x)
+        checkstride(p.parent, x)
     end
 end
 
@@ -832,19 +830,19 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bfmvf, :Float32),
     @eval begin
         function $fJ(p::FTPlan{$elty, 1}, x::StridedVector{$elty})
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'N', p, x)
             return x
         end
         function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, K}}, x::StridedVector{$elty}) where K
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
             return x
         end
         function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, K}}, x::StridedVector{$elty}) where K
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
             return x
         end
@@ -856,19 +854,19 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bbbfmvf, :Float32),
     @eval begin
         function $fJ(p::FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}, x::StridedVector{$elty})
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'N', '2', '1', p, x)
             return x
         end
         function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::StridedVector{$elty})
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', '1', '2', p, x)
             return x
         end
         function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::StridedVector{$elty})
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', '1', '2', p, x)
             return x
         end
@@ -880,19 +878,19 @@ for (fJ, fC, elty) in ((:lmul!, :ft_mpmv, :Float64),
     @eval begin
         function $fJ(p::ModifiedFTPlan{$elty}, x::StridedVector{$elty})
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'N', p, x)
             return x
         end
         function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan{$elty}}, x::StridedVector{$elty})
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
             return x
         end
         function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan{$elty}}, x::StridedVector{$elty})
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}), 'T', p, x)
             return x
         end
@@ -904,19 +902,19 @@ for (fJ, fC) in ((:lmul!, :ft_mpfr_trmv_ptr),
     @eval begin
         function $fJ(p::FTPlan{BigFloat, 1}, x::StridedVector{BigFloat})
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Int32), 'N', p.n, p, p.n, renew!(x), Base.MPFR.ROUNDING_MODE[])
             return x
         end
         function $fJ(p::AdjointFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::StridedVector{BigFloat}) where K
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), Base.MPFR.ROUNDING_MODE[])
             return x
         end
         function $fJ(p::TransposeFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::StridedVector{BigFloat}) where K
             checksize(p, x)
-            checkstrides(p, x)
+            checkstride(p, x)
             ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), Base.MPFR.ROUNDING_MODE[])
             return x
         end
@@ -930,20 +928,20 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bfmmf, :Float32),
     @eval begin
         function $fJ(p::FTPlan{$elty, 1}, x::StridedMatrix{$elty})
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', p, x, stride(x, 2), size(x, 2))
             return x
         end
         function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, K}}, x::StridedMatrix{$elty}) where K
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, stride(x, 2), size(x, 2))
             return x
         end
         function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, K}}, x::StridedMatrix{$elty}) where K
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, stride(x, 2), size(x, 2))
             return x
         end
     end
@@ -954,20 +952,20 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bbbfmmf, :Float32),
     @eval begin
         function $fJ(p::FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}, x::StridedMatrix{$elty})
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', '2', '1', p, x, size(x, 1), size(x, 2))
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', '2', '1', p, x, stride(x, 2), size(x, 2))
             return x
         end
         function $fJ(p::AdjointFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::StridedMatrix{$elty})
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', '1', '2', p, x, size(x, 1), size(x, 2))
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', '1', '2', p, x, stride(x, 2), size(x, 2))
             return x
         end
         function $fJ(p::TransposeFTPlan{$elty, FTPlan{$elty, 1, ASSOCIATEDJAC2JAC}}, x::StridedMatrix{$elty})
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', '1', '2', p, x, size(x, 1), size(x, 2))
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', '1', '2', p, x, stride(x, 2), size(x, 2))
             return x
         end
     end
@@ -978,20 +976,20 @@ for (fJ, fC, elty) in ((:lmul!, :ft_mpmm, :Float64),
     @eval begin
         function $fJ(p::ModifiedFTPlan{$elty}, x::StridedMatrix{$elty})
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'N', p, x, stride(x, 2), size(x, 2))
             return x
         end
         function $fJ(p::AdjointFTPlan{$elty, ModifiedFTPlan{$elty}}, x::StridedMatrix{$elty})
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, stride(x, 2), size(x, 2))
             return x
         end
         function $fJ(p::TransposeFTPlan{$elty, ModifiedFTPlan{$elty}}, x::StridedMatrix{$elty})
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{$elty}, Cint, Cint), 'T', p, x, stride(x, 2), size(x, 2))
             return x
         end
     end
@@ -1002,20 +1000,20 @@ for (fJ, fC) in ((:lmul!, :ft_mpfr_trmm_ptr),
     @eval begin
         function $fJ(p::FTPlan{BigFloat, 1}, x::StridedMatrix{BigFloat})
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'N', p.n, p, p.n, renew!(x), size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'N', p.n, p, p.n, renew!(x), stride(x, 2), size(x, 2), Base.MPFR.ROUNDING_MODE[])
             return x
         end
         function $fJ(p::AdjointFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::StridedMatrix{BigFloat}) where K
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), stride(x, 2), size(x, 2), Base.MPFR.ROUNDING_MODE[])
             return x
         end
         function $fJ(p::TransposeFTPlan{BigFloat, FTPlan{BigFloat, 1, K}}, x::StridedMatrix{BigFloat}) where K
             checksize(p, x)
-            checkstrides(p, x)
-            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), size(x, 1), size(x, 2), Base.MPFR.ROUNDING_MODE[])
+            checkstride(p, x)
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Cint, Cint, Ptr{mpfr_t}, Cint, Ptr{BigFloat}, Cint, Cint, Int32), 'T', p.parent.n, p, p.parent.n, renew!(x), stride(x, 2), size(x, 2), Base.MPFR.ROUNDING_MODE[])
             return x
         end
     end

From 51688b727fc3f26df00573ea2e48d09b6b2e329d Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Fri, 12 Aug 2022 14:14:22 +0100
Subject: [PATCH 114/222] Support 3D tensors (#185)

* Start supporting 3D tensors

* Tensor Chebyshev 2

* ichebyshevtransform
---
 Project.toml              |   2 +-
 src/chebyshevtransform.jl | 200 ++++++++++++++++++++++++--------------
 test/chebyshevtests.jl    |  50 ++++++++++
 3 files changed, 176 insertions(+), 76 deletions(-)

diff --git a/Project.toml b/Project.toml
index 9978d0d0..c5112e90 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.3"
+version = "0.14.4"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 9889f728..0cbcb84a 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -54,23 +54,100 @@ plan_chebyshevtransform(x::AbstractArray, dims...; kws...) = plan_chebyshevtrans
 @inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::StridedArray{T}) where T = mul!(y, P, x)
 @inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::AbstractArray) where T = mul!(y, P, convert(Array{T}, x))
 
-@inline _cheb1_rescale!(_, y::AbstractVector) = (y[1] /= 2; ldiv!(length(y), y))
 
-@inline function _cheb1_rescale!(d::Number, y::AbstractMatrix{T}) where T
+
+ldiv_dim_begin!(α, d::Number, y::AbstractVector) = y[1] /= α
+function ldiv_dim_begin!(α, d::Number, y::AbstractMatrix)
+    if isone(d)
+        ldiv!(α, @view(y[1,:]))
+    else
+        ldiv!(α, @view(y[:,1]))
+    end
+end
+function ldiv_dim_begin!(α, d::Number, y::AbstractArray{<:Any,3})
+    if isone(d)
+        ldiv!(α, @view(y[1,:,:]))
+    elseif d == 2
+        ldiv!(α, @view(y[:,1,:]))
+    else # d == 3
+        ldiv!(α, @view(y[:,:,1]))
+    end
+end
+
+ldiv_dim_end!(α, d::Number, y::AbstractVector) = y[end] /= α
+function ldiv_dim_end!(α, d::Number, y::AbstractMatrix)
+    if isone(d)
+        ldiv!(α, @view(y[end,:]))
+    else
+        ldiv!(α, @view(y[:,end]))
+    end
+end
+function ldiv_dim_end!(α, d::Number, y::AbstractArray{<:Any,3})
+    if isone(d)
+        ldiv!(α, @view(y[end,:,:]))
+    elseif d == 2
+        ldiv!(α, @view(y[:,end,:]))
+    else # d == 3
+        ldiv!(α, @view(y[:,:,end]))
+    end
+end
+
+lmul_dim_begin!(α, d::Number, y::AbstractVector) = y[1] *= α
+function lmul_dim_begin!(α, d::Number, y::AbstractMatrix)
     if isone(d)
-        ldiv!(2, view(y,1,:))
+        lmul!(α, @view(y[1,:]))
     else
-        ldiv!(2, view(y,:,1))
+        lmul!(α, @view(y[:,1]))
+    end
+end
+function lmul_dim_begin!(α, d::Number, y::AbstractArray{<:Any,3})
+    if isone(d)
+        lmul!(α, @view(y[1,:,:]))
+    elseif d == 2
+        lmul!(α, @view(y[:,1,:]))
+    else # d == 3
+        lmul!(α, @view(y[:,:,1]))
     end
+end
+
+lmul_dim_end!(α, d::Number, y::AbstractVector) = y[end] *= α
+function lmul_dim_end!(α, d::Number, y::AbstractMatrix)
+    if isone(d)
+        lmul!(α, @view(y[end,:]))
+    else
+        lmul!(α, @view(y[:,end]))
+    end
+end
+function lmul_dim_end!(α, d::Number, y::AbstractArray{<:Any,3})
+    if isone(d)
+        lmul!(α, @view(y[end,:,:]))
+    elseif d == 2
+        lmul!(α, @view(y[:,end,:]))
+    else # d == 3
+        lmul!(α, @view(y[:,:,end]))
+    end
+end
+
+
+@inline function _cheb1_rescale!(d::Number, y::AbstractArray)
+    ldiv_dim_begin!(2, d, y)
     ldiv!(size(y,d), y)
 end
 
-# TODO: higher dimensional arrays
-@inline function _cheb1_rescale!(d::UnitRange, y::AbstractMatrix{T}) where T
-    @assert d == 1:2
-    ldiv!(2, view(y,1,:))
-    ldiv!(2, view(y,:,1))
-    ldiv!(prod(size(y)), y)
+function _prod_size(sz, d)
+    ret = 1
+    for k in d
+        ret *= sz[k]
+    end
+    ret
+end
+
+
+@inline function _cheb1_rescale!(d::UnitRange, y::AbstractArray)
+    for k in d
+        ldiv_dim_begin!(2, k, y)
+    end
+    ldiv!(_prod_size(size(y), d), y)
 end
 
 function *(P::ChebyshevTransformPlan{T,1,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
@@ -90,27 +167,21 @@ function mul!(y::AbstractArray{T,N}, P::ChebyshevTransformPlan{T,1,K,false,N}, x
 end
 
 
-_cheb2_rescale!(_, y::AbstractVector) = (y[1] /= 2; y[end] /= 2; ldiv!(length(y)-1, y))
 
-function _cheb2_rescale!(d::Number, y::AbstractMatrix{T}) where T
-    if isone(d)
-        ldiv!(2, @view(y[1,:]))
-        ldiv!(2, @view(y[end,:]))
-    else
-        ldiv!(2, @view(y[:,1]))
-        ldiv!(2, @view(y[:,end]))
-    end
+function _cheb2_rescale!(d::Number, y::AbstractArray)
+    ldiv_dim_begin!(2, d, y)
+    ldiv_dim_end!(2, d, y)
     ldiv!(size(y,d)-1, y)
 end
 
 # TODO: higher dimensional arrays
-function _cheb2_rescale!(d::UnitRange, y::AbstractMatrix{T}) where T
-    @assert d == 1:2
-    ldiv!(2, @view(y[1,:]))
-    ldiv!(2, @view(y[end,:]))
-    ldiv!(2, @view(y[:,1]))
-    ldiv!(2, @view(y[:,end]))
-    ldiv!(prod(size(y) .- 1), y)
+function _cheb2_rescale!(d::UnitRange, y::AbstractArray)
+    for k in d
+        ldiv_dim_begin!(2, k, y)
+        ldiv_dim_end!(2, k, y)
+    end
+
+    ldiv!(_prod_size(size(y) .- 1, d), y)
 end
 
 function *(P::ChebyshevTransformPlan{T,2,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
@@ -200,33 +271,25 @@ end
 plan_ichebyshevtransform!(x::AbstractArray, dims...; kws...) = plan_ichebyshevtransform!(x, Val(1), dims...; kws...)
 plan_ichebyshevtransform(x::AbstractArray, dims...; kws...) = plan_ichebyshevtransform(x, Val(1), dims...; kws...)
 
-@inline _icheb1_prescale!(_, x::AbstractVector) = (x[1] *= 2)
-@inline function _icheb1_prescale!(d::Number, x::AbstractMatrix)
-    if isone(d)
-        lmul!(2, view(x,1,:))
-    else
-        lmul!(2, view(x,:,1))
-    end
+@inline function _icheb1_prescale!(d::Number, x::AbstractArray)
+    lmul_dim_begin!(2, d, x)
     x
 end
-@inline function _icheb1_prescale!(d::UnitRange, x::AbstractMatrix)
-    lmul!(2, view(x,:,1))
-    lmul!(2, view(x,1,:))
+@inline function _icheb1_prescale!(d::UnitRange, x::AbstractArray)
+    for k in d
+        _icheb1_prescale!(k, x)
+    end
     x
 end
-@inline _icheb1_postscale!(_, x::AbstractVector) = (x[1] /= 2)
-@inline function _icheb1_postscale!(d::Number, x::AbstractMatrix)
-    if isone(d)
-        ldiv!(2, view(x,1,:))
-    else
-        ldiv!(2, view(x,:,1))
-    end
+@inline function _icheb1_postscale!(d::Number, x::AbstractArray)
+    ldiv_dim_begin!(2, d, x)
     x
 end
 
-@inline function _icheb1_postscale!(d::UnitRange, x::AbstractMatrix)
-    ldiv!(2, view(x,1,:))
-    ldiv!(2, view(x,:,1))
+@inline function _icheb1_postscale!(d::UnitRange, x::AbstractArray)
+    for k in d
+        _icheb1_postscale!(k, x)
+    end
     x
 end
 
@@ -249,40 +312,27 @@ function mul!(y::AbstractArray{T,N}, P::IChebyshevTransformPlan{T,1,K,false,N},
     ldiv!(2^length(P.plan.region), y)
 end
 
-@inline _icheb2_prescale!(_, x::AbstractVector) = (x[1] *= 2; x[end] *= 2)
-@inline function _icheb2_prescale!(d::Number, x::AbstractMatrix)
-    if isone(d)
-        lmul!(2, @view(x[1,:]))
-        lmul!(2, @view(x[end,:]))
-    else
-        lmul!(2, @view(x[:,1]))
-        lmul!(2, @view(x[:,end]))
-    end
+@inline function _icheb2_prescale!(d::Number, x::AbstractArray)
+    lmul_dim_begin!(2, d, x)
+    lmul_dim_end!(2, d, x)
     x
 end
-@inline function _icheb2_prescale!(d::UnitRange, x::AbstractMatrix)
-    lmul!(2, @view(x[1,:]))
-    lmul!(2, @view(x[end,:]))
-    lmul!(2, @view(x[:,1]))
-    lmul!(2, @view(x[:,end]))
+@inline function _icheb2_prescale!(d::UnitRange, x::AbstractArray)
+    for k in d
+        _icheb2_prescale!(k, x)
+    end
     x
 end
-@inline _icheb2_postrescale!(_, x::AbstractVector) = (x[1] /= 2; x[end] /= 2)
-@inline function _icheb2_postrescale!(d::Number, x::AbstractMatrix)
-    if isone(d)
-        ldiv!(2, @view(x[1,:]))
-        ldiv!(2, @view(x[end,:]))
-    else
-        ldiv!(2, @view(x[:,1]))
-        ldiv!(2, @view(x[:,end]))
-    end
+
+@inline function _icheb2_postrescale!(d::Number, x::AbstractArray)
+    ldiv_dim_begin!(2, d, x)
+    ldiv_dim_end!(2, d, x)
     x
 end
-@inline function _icheb2_postrescale!(d::UnitRange, x::AbstractMatrix)
-    ldiv!(2, @view(x[1,:]))
-    ldiv!(2, @view(x[end,:]))
-    ldiv!(2, @view(x[:,1]))
-    ldiv!(2, @view(x[:,end]))
+@inline function _icheb2_postrescale!(d::UnitRange, x::AbstractArray)
+    for k in d
+        _icheb2_postrescale!(k, x)
+    end
     x
 end
 @inline function _icheb2_rescale!(d::Number, y::AbstractArray{T}) where T
@@ -292,7 +342,7 @@ end
 end
 @inline function _icheb2_rescale!(d::UnitRange, y::AbstractArray{T}) where T
     _icheb2_prescale!(d, y)
-    lmul!(prod(convert.(T, size(y) .- 1)./2), y)
+    lmul!(_prod_size(convert.(T, size(y) .- 1)./2, d), y)
     y
 end
 
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 76ba9ec5..d98d07b4 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -203,6 +203,56 @@ using FastTransforms, Test
         @test_throws ArgumentError ichebyshevtransform!(copy(X), Val(2))
     end
 
+    @testset "tensor" begin
+        X = randn(4,5,6)
+        X̃ = similar(X)
+        @testset "chebyshevtransform" begin
+            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevtransform(X[:,k,j]) end
+            @test @inferred(chebyshevtransform(X,1)) ≈ @inferred(chebyshevtransform!(copy(X),1)) ≈ X̃
+            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevtransform(X[k,:,j]) end
+            @test chebyshevtransform(X,2) ≈ chebyshevtransform!(copy(X),2) ≈ X̃
+            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevtransform(X[k,j,:]) end
+            @test chebyshevtransform(X,3) ≈ chebyshevtransform!(copy(X),3) ≈ X̃
+
+            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevtransform(X[:,k,j],Val(2)) end
+            @test @inferred(chebyshevtransform(X,Val(2),1)) ≈ @inferred(chebyshevtransform!(copy(X),Val(2),1)) ≈ X̃
+            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevtransform(X[k,:,j],Val(2)) end
+            @test chebyshevtransform(X,Val(2),2) ≈ chebyshevtransform!(copy(X),Val(2),2) ≈ X̃
+            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevtransform(X[k,j,:],Val(2)) end
+            @test chebyshevtransform(X,Val(2),3) ≈ chebyshevtransform!(copy(X),Val(2),3) ≈ X̃
+
+            @test @inferred(chebyshevtransform(X)) ≈ @inferred(chebyshevtransform!(copy(X))) ≈ chebyshevtransform(chebyshevtransform(chebyshevtransform(X,1),2),3)
+            @test @inferred(chebyshevtransform(X,Val(2))) ≈ @inferred(chebyshevtransform!(copy(X),Val(2))) ≈ chebyshevtransform(chebyshevtransform(chebyshevtransform(X,Val(2),1),Val(2),2),Val(2),3)
+        end
+
+        @testset "ichebyshevtransform" begin
+            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevtransform(X[:,k,j]) end
+            @test @inferred(ichebyshevtransform(X,1)) ≈ @inferred(ichebyshevtransform!(copy(X),1)) ≈ X̃
+            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevtransform(X[k,:,j]) end
+            @test ichebyshevtransform(X,2) ≈ ichebyshevtransform!(copy(X),2) ≈ X̃
+            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevtransform(X[k,j,:]) end
+            @test ichebyshevtransform(X,3) ≈ ichebyshevtransform!(copy(X),3) ≈ X̃
+
+            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevtransform(X[:,k,j],Val(2)) end
+            @test @inferred(ichebyshevtransform(X,Val(2),1)) ≈ @inferred(ichebyshevtransform!(copy(X),Val(2),1)) ≈ X̃
+            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevtransform(X[k,:,j],Val(2)) end
+            @test ichebyshevtransform(X,Val(2),2) ≈ ichebyshevtransform!(copy(X),Val(2),2) ≈ X̃
+            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevtransform(X[k,j,:],Val(2)) end
+            @test ichebyshevtransform(X,Val(2),3) ≈ ichebyshevtransform!(copy(X),Val(2),3) ≈ X̃
+
+            @test @inferred(ichebyshevtransform(X)) ≈ @inferred(ichebyshevtransform!(copy(X))) ≈ ichebyshevtransform(ichebyshevtransform(ichebyshevtransform(X,1),2),3)
+            @test @inferred(ichebyshevtransform(X,Val(2))) ≈ @inferred(ichebyshevtransform!(copy(X),Val(2))) ≈ ichebyshevtransform(ichebyshevtransform(ichebyshevtransform(X,Val(2),1),Val(2),2),Val(2),3)
+
+            @test ichebyshevtransform(chebyshevtransform(X)) ≈ X
+            @test chebyshevtransform(ichebyshevtransform(X)) ≈ X
+        end
+
+        X = randn(1,1,1)
+        @test chebyshevtransform!(copy(X), Val(1)) == ichebyshevtransform!(copy(X), Val(1)) == X
+        @test_throws ArgumentError chebyshevtransform!(copy(X), Val(2))
+        @test_throws ArgumentError ichebyshevtransform!(copy(X), Val(2))
+    end
+
     @testset "Integer" begin
         @test chebyshevtransform([1,2,3]) == chebyshevtransform([1.,2,3])
         @test chebyshevtransform([1,2,3], Val(2)) == chebyshevtransform([1.,2,3], Val(2))

From 54c882e2f81caeebd1012269e4327c77e7901bf2 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Tue, 23 Aug 2022 13:23:33 -0500
Subject: [PATCH 115/222] remove DSP (but ensure that [0.7.7, 0.8.0) is loaded
 by tagging a new patch of ToeplitzMatrices)

https://github.com/JuliaMatrices/ToeplitzMatrices.jl/commit/1e4f92c15d9fd83c204cffbd19886c3f6addaf32
---
 Project.toml          | 6 ++----
 src/FastTransforms.jl | 6 ------
 src/fftBigFloat.jl    | 4 ----
 3 files changed, 2 insertions(+), 14 deletions(-)
 delete mode 100644 src/fftBigFloat.jl

diff --git a/Project.toml b/Project.toml
index c5112e90..30ff3ebc 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,10 +1,9 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.4"
+version = "0.14.5"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
-DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
 FastTransforms_jll = "34b6f7d7-08f9-5794-9e10-3819e4c7e49a"
@@ -19,7 +18,6 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "1.0"
-DSP = "0.6, 0.7"
 FFTW = "1"
 FastGaussQuadrature = "0.4"
 FastTransforms_jll = "0.6.0"
@@ -27,5 +25,5 @@ FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
 GenericFFT = "0.1"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
-ToeplitzMatrices = "0.6, 0.7"
+ToeplitzMatrices = "0.7.2"
 julia = "1.7"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 9c1cddc6..06ffbc0d 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -3,8 +3,6 @@ module FastTransforms
 using FastGaussQuadrature, FillArrays, LinearAlgebra,
       Reexport, SpecialFunctions, ToeplitzMatrices
 
-import DSP
-
 @reexport using AbstractFFTs
 @reexport using FFTW
 @reexport using GenericFFT
@@ -21,8 +19,6 @@ import AbstractFFTs: Plan, ScaledPlan,
                      fftshift, ifftshift, rfft_output_size, brfft_output_size,
                      plan_inv, normalization
 
-import DSP: conv
-
 import FFTW: dct, dct!, idct, idct!, plan_dct!, plan_idct!,
              plan_dct, plan_idct, fftwNumber
 
@@ -95,8 +91,6 @@ export weightedhermitetransform, iweightedhermitetransform
 
 include("hermite.jl")
 
-include("fftBigFloat.jl")
-
 export gaunt
 
 include("gaunt.jl")
diff --git a/src/fftBigFloat.jl b/src/fftBigFloat.jl
deleted file mode 100644
index e67f1143..00000000
--- a/src/fftBigFloat.jl
+++ /dev/null
@@ -1,4 +0,0 @@
-conv(u::AbstractArray{T, N}, v::AbstractArray{T, N}) where {T<:AbstractFloat, N} = GenericFFT._conv!(deepcopy(u), deepcopy(v))
-conv(u::AbstractArray{T, N}, v::AbstractArray{Complex{T}, N}) where {T<:AbstractFloat, N} = GenericFFT._conv!(complex(deepcopy(u)), deepcopy(v))
-conv(u::AbstractArray{Complex{T}, N}, v::AbstractArray{T, N}) where {T<:AbstractFloat, N} = GenericFFT._conv!(deepcopy(u), complex(deepcopy(v)))
-conv(u::AbstractArray{Complex{T}, N}, v::AbstractArray{Complex{T}, N}) where {T<:AbstractFloat, N} = GenericFFT._conv!(deepcopy(u), deepcopy(v))

From 621c3c316d5d72d2e73c7dfd313b58d6112f23e3 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Tue, 23 Aug 2022 13:34:02 -0500
Subject: [PATCH 116/222] downgrade ToeplitzMatrices.jl patch

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 30ff3ebc..c0b5494a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -25,5 +25,5 @@ FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
 GenericFFT = "0.1"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
-ToeplitzMatrices = "0.7.2"
+ToeplitzMatrices = "0.7.1"
 julia = "1.7"

From 8be10bf3662309f6704f9823fc243e7e65e8ac33 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Tue, 23 Aug 2022 13:54:50 -0500
Subject: [PATCH 117/222] remove old tests

---
 test/fftBigFloattests.jl | 9 ---------
 test/runtests.jl         | 1 -
 2 files changed, 10 deletions(-)
 delete mode 100644 test/fftBigFloattests.jl

diff --git a/test/fftBigFloattests.jl b/test/fftBigFloattests.jl
deleted file mode 100644
index 04040571..00000000
--- a/test/fftBigFloattests.jl
+++ /dev/null
@@ -1,9 +0,0 @@
-using DSP, FastTransforms, Test
-
-@testset "BigFloat Convolution" begin
-    s = big(1) ./ (1:10)
-    s64 = Float64.(s)
-    @test Float64.(conv(s, s)) ≈ conv(s64, s64)
-    @test s == big(1) ./ (1:10) #67, ensure conv doesn't overwrite input
-    @test all(s64 .=== Float64.(big(1) ./ (1:10)))
-end
diff --git a/test/runtests.jl b/test/runtests.jl
index 33c88fd1..8a3790a7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -5,7 +5,6 @@ include("chebyshevtests.jl")
 include("quadraturetests.jl")
 include("libfasttransformstests.jl")
 include("nuffttests.jl")
-include("fftBigFloattests.jl")
 include("paduatests.jl")
 include("gaunttests.jl")
 include("hermitetests.jl")

From d675e6a0c646e4a556ff9c6c67f81d746db79724 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Thu, 13 Oct 2022 20:48:32 +0100
Subject: [PATCH 118/222] import interlace

---
 Project.toml          | 2 +-
 src/FastTransforms.jl | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index c0b5494a..1def97ec 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.5"
+version = "0.14.6"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 06ffbc0d..20740761 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -28,6 +28,8 @@ import FillArrays: AbstractFill, getindex_value
 
 import LinearAlgebra: mul!, lmul!, ldiv!
 
+import GenericFFT: interlace # imported in downstream packages
+
 export leg2cheb, cheb2leg, ultra2ultra, jac2jac,
        lag2lag, jac2ultra, ultra2jac, jac2cheb,
        cheb2jac, ultra2cheb, cheb2ultra, associatedjac2jac,

From 3cc297cb32ce324676fb12712c4fd58b169eba7d Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Wed, 2 Nov 2022 09:49:47 +0000
Subject: [PATCH 119/222] Update CompatHelper.yml

---
 .github/workflows/CompatHelper.yml | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml
index 7784f241..09181610 100644
--- a/.github/workflows/CompatHelper.yml
+++ b/.github/workflows/CompatHelper.yml
@@ -3,16 +3,35 @@ on:
   schedule:
     - cron: 0 0 * * *
   workflow_dispatch:
+permissions:
+  contents: write
+  pull-requests: write
 jobs:
   CompatHelper:
     runs-on: ubuntu-latest
     steps:
+      - name: Check if Julia is already available in the PATH
+        id: julia_in_path
+        run: which julia
+        continue-on-error: true
+      - name: Install Julia, but only if it is not already available in the PATH
+        uses: julia-actions/setup-julia@v1
+        with:
+          version: '1'
+          arch: ${{ runner.arch }}
+        if: steps.julia_in_path.outcome != 'success'
+      - name: "Add the General registry via Git"
+        run: |
+          import Pkg
+          ENV["JULIA_PKG_SERVER"] = ""
+          Pkg.Registry.add("General")
+        shell: julia --color=yes {0}
       - name: "Install CompatHelper"
         run: |
           import Pkg
           name = "CompatHelper"
           uuid = "aa819f21-2bde-4658-8897-bab36330d9b7"
-          version = "2"
+          version = "3"
           Pkg.add(; name, uuid, version)
         shell: julia --color=yes {0}
       - name: "Run CompatHelper"

From 59774abd59061e26ee18e267d82963f8a87fd32a Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 2 Nov 2022 10:30:02 +0000
Subject: [PATCH 120/222] CompatHelper: bump compat for FastGaussQuadrature to
 0.5, (keep existing compat) (#188)

* CompatHelper: bump compat for FastGaussQuadrature to 0.5, (keep existing compat)

* v0.14.7

Co-authored-by: CompatHelper Julia <compathelper_noreply@julialang.org>
Co-authored-by: Sheehan Olver <solver@mac.com>
---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 1def97ec..cbdeb56e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.6"
+version = "0.14.7"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -19,7 +19,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 [compat]
 AbstractFFTs = "1.0"
 FFTW = "1"
-FastGaussQuadrature = "0.4"
+FastGaussQuadrature = "0.4, 0.5"
 FastTransforms_jll = "0.6.0"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
 GenericFFT = "0.1"

From 1e949d98efdbe4e3d31a50fb5a07d6dc745f23c4 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 15 Nov 2022 11:32:30 +0000
Subject: [PATCH 121/222] start annulus

need tests
---
 Project.toml             |   2 +-
 docs/src/dev.md          |  12 ++--
 examples/annulus.jl      |  73 ++++++++++++++++++++++
 src/FastTransforms.jl    |   6 +-
 src/libfasttransforms.jl | 127 ++++++++++++++++++++++++++++++++++-----
 5 files changed, 196 insertions(+), 24 deletions(-)
 create mode 100644 examples/annulus.jl

diff --git a/Project.toml b/Project.toml
index cbdeb56e..58cbad27 100644
--- a/Project.toml
+++ b/Project.toml
@@ -20,7 +20,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 AbstractFFTs = "1.0"
 FFTW = "1"
 FastGaussQuadrature = "0.4, 0.5"
-FastTransforms_jll = "0.6.0"
+FastTransforms_jll = "0.6.1"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
 GenericFFT = "0.1"
 Reexport = "0.2, 1.0"
diff --git a/docs/src/dev.md b/docs/src/dev.md
index 2335abd7..e45211b7 100644
--- a/docs/src/dev.md
+++ b/docs/src/dev.md
@@ -37,13 +37,13 @@ This lets the developer experiment with new features through `ccall`ing into ble
 To get from a C library release to a Julia package release, the developer needs to update Yggdrasil's [build_tarballs.jl](https://github.com/JuliaPackaging/Yggdrasil/blob/master/F/FastTransforms/build_tarballs.jl) script for the new version and its 256-bit SHA. On macOS, the SHA can be found by:
 
 ```julia
-shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.6.0 --output FastTransforms.tar.gz
+shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.6.1 --output FastTransforms.tar.gz
   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                  Dload  Upload   Total   Spent    Left  Speed
-100  162k    0  162k    0     0   252k      0 --:--:-- --:--:-- --:--:--  252k
+100  168k    0  168k    0     0   429k      0 --:--:-- --:--:-- --:--:--  429k
 
 shell> shasum -a 256 FastTransforms.tar.gz
-ae2db2fa808ca17c5dc5ac25b079eba2dbe598d061b9b4e14c948680870abc3c  FastTransforms.tar.gz
+4ee42f264626b335e3f8bed7a10935d54393589813d8558802f0eae9ca46d36e  FastTransforms.tar.gz
 
 shell> rm -f FastTransforms.tar.gz
 
@@ -52,17 +52,17 @@ shell> rm -f FastTransforms.tar.gz
 Using [SHA.jl](https://github.com/JuliaCrypto/SHA.jl), the SHA can also be found by:
 
 ```julia
-shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.6.0 --output FastTransforms.tar.gz
+shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.6.1 --output FastTransforms.tar.gz
   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                  Dload  Upload   Total   Spent    Left  Speed
-100  156k    0  156k    0     0   443k      0 --:--:-- --:--:-- --:--:--  443k
+100  168k    0  168k    0     0   442k      0 --:--:-- --:--:-- --:--:--  443k
 
 julia> using SHA
 
 julia> open("FastTransforms.tar.gz") do f
            bytes2hex(sha256(f))
        end
-"ae2db2fa808ca17c5dc5ac25b079eba2dbe598d061b9b4e14c948680870abc3c"
+"4ee42f264626b335e3f8bed7a10935d54393589813d8558802f0eae9ca46d36e"
 
 shell> rm -f FastTransforms.tar.gz
 
diff --git a/examples/annulus.jl b/examples/annulus.jl
new file mode 100644
index 00000000..4feb5b45
--- /dev/null
+++ b/examples/annulus.jl
@@ -0,0 +1,73 @@
+# # Integration on an annulus
+# In this example, we explore integration of the function:
+# ```math
+#   f(x,y) = \frac{x^3}{x^2+y^2-\frac{1}{4}},
+# ```
+# over the annulus defined by $\{(r,\theta) : \frac{2}{3} < r < 1, 0 < \theta < 2\pi\}$.
+# We will calculate the integral:
+# ```math
+#   \int_0^{2\pi}\int_{\frac{2}{3}}^1 f(r\cos\theta,r\sin\theta)^2r{\rm\,d}r{\rm\,d}\theta,
+# ```
+# by analyzing the function in an annulus polynomial series.
+# We analyze the function on an $N\times M$ tensor product grid defined by:
+# ```math
+# \begin{aligned}
+# r_n & = \sqrt{\cos^2\left[(n+\tfrac{1}{2})\pi/2N\right] + \rho^2 \sin^2\left[(n+\tfrac{1}{2})\pi/2N\right]},\quad{\rm for}\quad 0\le n < N,\quad{\rm and}\\
+# \theta_m & = 2\pi m/M,\quad{\rm for}\quad 0\le m < M;
+# \end{aligned}
+# ```
+# we convert the function samples to Chebyshev×Fourier coefficients using
+# `plan_annulus_analysis`; and finally, we transform the Chebyshev×Fourier
+# coefficients to annulus polynomial coefficients using `plan_ann2cxf`.
+#
+# For the storage pattern of the arrays, please consult the
+# [documentation](https://MikaelSlevinsky.github.io/FastTransforms).
+
+using FastTransforms, LinearAlgebra, Plots
+const GENFIGS = joinpath(pkgdir(FastTransforms), "docs/src/generated")
+!isdir(GENFIGS) && mkdir(GENFIGS)
+plotlyjs()
+
+# Our function $f$ on the annulus:
+f = (x,y) -> x^3/(x^2+y^2-1/4)
+
+# The annulus polynomial degree:
+N = 8
+M = 4N-3
+
+# The annulus inner radius:
+ρ = 2/3
+
+# The radial grid:
+r = [begin t = (N-n-0.5)/(2N); ct2 = sinpi(t); st2 = cospi(t); sqrt(ct2^2+ρ^2*st2^2) end; for n in 0:N-1]
+
+# The angular grid (mod $\pi$):
+θ = (0:M-1)*2/M
+
+# On the mapped tensor product grid, our function samples are:
+F = [f(r*cospi(θ), r*sinpi(θ)) for r in r, θ in θ]
+
+# We superpose a surface plot of $f$ on top of the grid:
+X = [r*cospi(θ) for r in r, θ in θ]
+Y = [r*sinpi(θ) for r in r, θ in θ]
+scatter3d(vec(X), vec(Y), vec(0F); markersize=0.75, markercolor=:red)
+surface!(X, Y, F; legend=false, xlabel="x", ylabel="y", zlabel="f")
+savefig(joinpath(GENFIGS, "annulus.html"))
+###```@raw html
+###<object type="text/html" data="../annulus.html" style="width:100%;height:400px;"></object>
+###```
+
+# We precompute an Annulus--Chebyshev×Fourier plan:
+α, β, γ = 0, 0, 0
+P = plan_ann2cxf(F, α, β, γ, ρ)
+
+# And an FFTW Chebyshev×Fourier analysis plan on the annulus:
+PA = plan_annulus_analysis(F, ρ)
+
+# Its annulus coefficients are:
+U = P\(PA*F)
+
+# The annulus coefficients are useful for integration.
+# The integral of $[f(x,y)]^2$ over the annulus is
+# approximately the square of the 2-norm of the coefficients:
+norm(U)^2, 5π/8*(1675/4536+9*log(3)/32-3*log(7)/32)
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 20740761..6e863f63 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -34,8 +34,9 @@ export leg2cheb, cheb2leg, ultra2ultra, jac2jac,
        lag2lag, jac2ultra, ultra2jac, jac2cheb,
        cheb2jac, ultra2cheb, cheb2ultra, associatedjac2jac,
        modifiedjac2jac, modifiedlag2lag, modifiedherm2herm,
-       sph2fourier, sphv2fourier, disk2cxf, rectdisk2cheb, tri2cheb, tet2cheb,
-       fourier2sph, fourier2sphv, cxf2disk, cheb2rectdisk, cheb2tri, cheb2tet
+       sph2fourier, sphv2fourier, disk2cxf, ann2cxf, rectdisk2cheb,
+       tri2cheb, tet2cheb,fourier2sph, fourier2sphv, cxf2disk, cxf2ann,
+       cheb2rectdisk, cheb2tri, cheb2tet
 
 export plan_leg2cheb, plan_cheb2leg, plan_ultra2ultra, plan_jac2jac,
        plan_lag2lag, plan_jac2ultra, plan_ultra2jac, plan_jac2cheb,
@@ -44,6 +45,7 @@ export plan_leg2cheb, plan_cheb2leg, plan_ultra2ultra, plan_jac2jac,
        plan_sph2fourier, plan_sph_synthesis, plan_sph_analysis,
        plan_sphv2fourier, plan_sphv_synthesis, plan_sphv_analysis,
        plan_disk2cxf, plan_disk_synthesis, plan_disk_analysis,
+       plan_ann2cxf, plan_annulus_synthesis, plan_annulus_analysis,
        plan_rectdisk2cheb, plan_rectdisk_synthesis, plan_rectdisk_analysis,
        plan_tri2cheb, plan_tri_synthesis, plan_tri_analysis,
        plan_tet2cheb, plan_tet_synthesis, plan_tet_analysis,
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 7f71e0a1..00e2e8db 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -122,6 +122,7 @@ end
     SPHERE
     SPHEREV
     DISK
+    ANNULUS
     RECTDISK
     TRIANGLE
     TETRAHEDRON
@@ -132,6 +133,8 @@ end
     SPHEREVANALYSIS
     DISKSYNTHESIS
     DISKANALYSIS
+    ANNULUSSYNTHESIS
+    ANNULUSANALYSIS
     RECTDISKSYNTHESIS
     RECTDISKANALYSIS
     TRIANGLESYNTHESIS
@@ -163,6 +166,7 @@ let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                SPHERE               => "Spherical harmonic--Fourier",
                SPHEREV              => "Spherical vector field--Fourier",
                DISK                 => "Zernike--Chebyshev×Fourier",
+               ANNULUS              => "Annulus--Chebyshev×Fourier",
                RECTDISK             => "Dunkl-Xu--Chebyshev²",
                TRIANGLE             => "Proriol--Chebyshev²",
                TETRAHEDRON          => "Proriol--Chebyshev³",
@@ -173,6 +177,8 @@ let k2s = Dict(LEG2CHEB             => "Legendre--Chebyshev",
                SPHEREVANALYSIS      => "FFTW Fourier analysis on the sphere (vector field)",
                DISKSYNTHESIS        => "FFTW Chebyshev×Fourier synthesis on the disk",
                DISKANALYSIS         => "FFTW Chebyshev×Fourier analysis on the disk",
+               ANNULUSSYNTHESIS     => "FFTW Chebyshev×Fourier synthesis on the annulus",
+               ANNULUSANALYSIS      => "FFTW Chebyshev×Fourier analysis on the annulus",
                RECTDISKSYNTHESIS    => "FFTW Chebyshev synthesis on the rectangularized disk",
                RECTDISKANALYSIS     => "FFTW Chebyshev analysis on the rectangularized disk",
                TRIANGLESYNTHESIS    => "FFTW Chebyshev synthesis on the triangle",
@@ -216,6 +222,7 @@ show(io::IO, p::FTPlan{T, 1, K}) where {T, K} = print(io, "FastTransforms ", kin
 show(io::IO, p::FTPlan{T, 2, SPHERE}) where T = print(io, "FastTransforms ", kind2string(SPHERE), " plan for $(p.n)×$(2p.n-1)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, SPHEREV}) where T = print(io, "FastTransforms ", kind2string(SPHEREV), " plan for $(p.n)×$(2p.n-1)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, DISK}) where T = print(io, "FastTransforms ", kind2string(DISK), " plan for $(p.n)×$(4p.n-3)-element array of ", T)
+show(io::IO, p::FTPlan{T, 2, ANNULUS}) where T = print(io, "FastTransforms ", kind2string(ANNULUS), " plan for $(p.n)×$(4p.n-3)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, RECTDISK}) where T = print(io, "FastTransforms ", kind2string(RECTDISK), " plan for $(p.n)×$(p.n)-element array of ", T)
 show(io::IO, p::FTPlan{T, 2, TRIANGLE}) where T = print(io, "FastTransforms ", kind2string(TRIANGLE), " plan for $(p.n)×$(p.n)-element array of ", T)
 show(io::IO, p::FTPlan{T, 3, TETRAHEDRON}) where T = print(io, "FastTransforms ", kind2string(TETRAHEDRON), " plan for $(p.n)×$(p.n)×$(p.n)-element array of ", T)
@@ -244,7 +251,7 @@ for (N, K) in ((2, RECTDISK), (2, TRIANGLE), (3, TETRAHEDRON))
     end
 end
 
-for K in (SPHERE, SPHEREV, DISK, SPINSPHERE)
+for K in (SPHERE, SPHEREV, DISK, ANNULUS, SPINSPHERE)
     @eval function checksize(p::FTPlan{T, 2, $K}, x::Matrix{T}) where T
         if p.n != size(x, 1)
             throw(DimensionMismatch("FTPlan has dimensions $(p.n) × $(p.n), x has leading dimension $(size(x, 1))"))
@@ -281,8 +288,11 @@ destroy_plan(p::FTPlan{Float64, 1}) = ccall((:ft_destroy_tb_eigen_FMM, libfasttr
 destroy_plan(p::FTPlan{BigFloat, 1}) = ccall((:ft_mpfr_destroy_plan, libfasttransforms), Cvoid, (Ptr{mpfr_t}, Cint), p, p.n)
 destroy_plan(p::FTPlan{Float32, 1, ASSOCIATEDJAC2JAC}) = ccall((:ft_destroy_btb_eigen_FMMf, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 1, ASSOCIATEDJAC2JAC}) = ccall((:ft_destroy_btb_eigen_FMM, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float32, 1, MODIFIEDJAC2JAC}) = ccall((:ft_destroy_modified_planf, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 1, MODIFIEDJAC2JAC}) = ccall((:ft_destroy_modified_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float32, 1, MODIFIEDLAG2LAG}) = ccall((:ft_destroy_modified_planf, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 1, MODIFIEDLAG2LAG}) = ccall((:ft_destroy_modified_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float32, 1, MODIFIEDHERM2HERM}) = ccall((:ft_destroy_modified_planf, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 1, MODIFIEDHERM2HERM}) = ccall((:ft_destroy_modified_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64}) = ccall((:ft_destroy_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Complex{Float64}, 2, SPINSPHERE}) = ccall((:ft_destroy_spin_harmonic_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
@@ -292,6 +302,8 @@ destroy_plan(p::FTPlan{Float64, 2, SPHEREVSYNTHESIS}) = ccall((:ft_destroy_spher
 destroy_plan(p::FTPlan{Float64, 2, SPHEREVANALYSIS}) = ccall((:ft_destroy_sphere_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, DISKSYNTHESIS}) = ccall((:ft_destroy_disk_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, DISKANALYSIS}) = ccall((:ft_destroy_disk_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float64, 2, ANNULUSSYNTHESIS}) = ccall((:ft_destroy_annulus_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
+destroy_plan(p::FTPlan{Float64, 2, ANNULUSANALYSIS}) = ccall((:ft_destroy_annulus_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, RECTDISKSYNTHESIS}) = ccall((:ft_destroy_rectdisk_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, RECTDISKANALYSIS}) = ccall((:ft_destroy_rectdisk_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
 destroy_plan(p::FTPlan{Float64, 2, TRIANGLESYNTHESIS}) = ccall((:ft_destroy_triangle_fftw_plan, libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, ), p)
@@ -420,7 +432,7 @@ for f in (:leg2cheb, :cheb2leg, :ultra2ultra, :jac2jac,
           :lag2lag, :jac2ultra, :ultra2jac, :jac2cheb,
           :cheb2jac, :ultra2cheb, :cheb2ultra, :associatedjac2jac,
           :modifiedjac2jac, :modifiedlag2lag, :modifiedherm2herm,
-          :sph2fourier, :sphv2fourier, :disk2cxf,
+          :sph2fourier, :sphv2fourier, :disk2cxf, :ann2cxf,
           :rectdisk2cheb, :tri2cheb, :tet2cheb)
     plan_f = Symbol("plan_", f)
     @eval begin
@@ -431,8 +443,9 @@ for f in (:leg2cheb, :cheb2leg, :ultra2ultra, :jac2jac,
 end
 
 for (f, plan_f) in ((:fourier2sph, :plan_sph2fourier), (:fourier2sphv, :plan_sphv2fourier),
-                    (:cxf2disk, :plan_disk2cxf), (:cheb2rectdisk, :plan_rectdisk2cheb),
-                    (:cheb2tri, :plan_tri2cheb), (:cheb2tet, :plan_tet2cheb))
+                    (:cxf2disk, :plan_disk2cxf), (:cxf2ann, :plan_ann2cxf),
+                    (:cheb2rectdisk, :plan_rectdisk2cheb), (:cheb2tri, :plan_tri2cheb),
+                    (:cheb2tet, :plan_tet2cheb))
     @eval begin
         $f(x::AbstractArray, y...; z...) = $plan_f(x, y...; z...)\x
     end
@@ -502,6 +515,36 @@ function plan_associatedjac2jac(::Type{Float32}, n::Integer, c::Integer, α, β,
     return FTPlan{Float32, 1, ASSOCIATEDJAC2JAC}(plan, n)
 end
 
+function plan_modifiedjac2jac(::Type{Float32}, n::Integer, α, β, u::Vector{Float32}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_jacobi_to_jacobif, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float32, Float32, Cint, Ptr{Float32}, Cint, Ptr{Float32}, Cint), n, α, β, length(u), u, 0, C_NULL, verbose)
+    return FTPlan{Float32, 1, MODIFIEDJAC2JAC}(plan, n)
+end
+
+function plan_modifiedjac2jac(::Type{Float32}, n::Integer, α, β, u::Vector{Float32}, v::Vector{Float32}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_jacobi_to_jacobif, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float32, Float32, Cint, Ptr{Float32}, Cint, Ptr{Float32}, Cint), n, α, β, length(u), u, length(v), v, verbose)
+    return FTPlan{Float32, 1, MODIFIEDJAC2JAC}(plan, n)
+end
+
+function plan_modifiedlag2lag(::Type{Float32}, n::Integer, α, u::Vector{Float32}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_laguerre_to_laguerref, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float32, Cint, Ptr{Float32}, Cint, Ptr{Float32}, Cint), n, α, length(u), u, 0, C_NULL, verbose)
+    return FTPlan{Float32, 1, MODIFIEDLAG2LAG}(plan, n)
+end
+
+function plan_modifiedlag2lag(::Type{Float32}, n::Integer, α, u::Vector{Float32}, v::Vector{Float32}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_laguerre_to_laguerref, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float32, Cint, Ptr{Float32}, Cint, Ptr{Float32}, Cint), n, α, length(u), u, length(v), v, verbose)
+    return FTPlan{Float32, 1, MODIFIEDLAG2LAG}(plan, n)
+end
+
+function plan_modifiedherm2herm(::Type{Float32}, n::Integer, u::Vector{Float32}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_hermite_to_hermitef, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Ptr{Float32}, Cint, Ptr{Float32}, Cint), n, length(u), u, 0, C_NULL, verbose)
+    return FTPlan{Float32, 1, MODIFIEDHERM2HERM}(plan, n)
+end
+
+function plan_modifiedherm2herm(::Type{Float32}, n::Integer, u::Vector{Float32}, v::Vector{Float32}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_hermite_to_hermitef, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Ptr{Float32}, Cint, Ptr{Float32}, Cint), n, length(u), u, length(v), v, verbose)
+    return FTPlan{Float32, 1, MODIFIEDHERM2HERM}(plan, n)
+end
+
 
 function plan_leg2cheb(::Type{Float64}, n::Integer; normleg::Bool=false, normcheb::Bool=false)
     plan = ccall((:ft_plan_legendre_to_chebyshev, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Cint), normleg, normcheb, n)
@@ -563,9 +606,8 @@ function plan_associatedjac2jac(::Type{Float64}, n::Integer, c::Integer, α, β,
     return FTPlan{Float64, 1, ASSOCIATEDJAC2JAC}(plan, n)
 end
 
-function plan_modifiedjac2jac(::Type{Float64}, n::Integer, α, β, w::Vector{Float64}; verbose::Bool=false)
-    #plan_modifiedjac2jac(Float64, n, α, β, w, Vector{Float64}(undef, 0); verbose=verbose)
-    plan = ccall((:ft_plan_modified_jacobi_to_jacobi, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Float64, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, α, β, length(w), w, 0, C_NULL, verbose)
+function plan_modifiedjac2jac(::Type{Float64}, n::Integer, α, β, u::Vector{Float64}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_jacobi_to_jacobi, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Float64, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, α, β, length(u), u, 0, C_NULL, verbose)
     return FTPlan{Float64, 1, MODIFIEDJAC2JAC}(plan, n)
 end
 
@@ -574,8 +616,8 @@ function plan_modifiedjac2jac(::Type{Float64}, n::Integer, α, β, u::Vector{Flo
     return FTPlan{Float64, 1, MODIFIEDJAC2JAC}(plan, n)
 end
 
-function plan_modifiedlag2lag(::Type{Float64}, n::Integer, α, w::Vector{Float64}; verbose::Bool=false)
-    plan = ccall((:ft_plan_modified_laguerre_to_laguerre, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, α, length(w), w, 0, C_NULL, verbose)
+function plan_modifiedlag2lag(::Type{Float64}, n::Integer, α, u::Vector{Float64}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_laguerre_to_laguerre, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, α, length(u), u, 0, C_NULL, verbose)
     return FTPlan{Float64, 1, MODIFIEDLAG2LAG}(plan, n)
 end
 
@@ -584,8 +626,8 @@ function plan_modifiedlag2lag(::Type{Float64}, n::Integer, α, u::Vector{Float64
     return FTPlan{Float64, 1, MODIFIEDLAG2LAG}(plan, n)
 end
 
-function plan_modifiedherm2herm(::Type{Float64}, n::Integer, w::Vector{Float64}; verbose::Bool=false)
-    plan = ccall((:ft_plan_modified_hermite_to_hermite, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, length(w), w, 0, C_NULL, verbose)
+function plan_modifiedherm2herm(::Type{Float64}, n::Integer, u::Vector{Float64}; verbose::Bool=false)
+    plan = ccall((:ft_plan_modified_hermite_to_hermite, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Ptr{Float64}, Cint, Ptr{Float64}, Cint), n, length(u), u, 0, C_NULL, verbose)
     return FTPlan{Float64, 1, MODIFIEDHERM2HERM}(plan, n)
 end
 
@@ -666,6 +708,11 @@ function plan_disk2cxf(::Type{Float64}, n::Integer, α, β)
     return FTPlan{Float64, 2, DISK}(plan, n)
 end
 
+function plan_ann2cxf(::Type{Float64}, n::Integer, α, β, γ, ρ)
+    plan = ccall((:ft_plan_ann2cxf, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64, Float64, Float64, Float64), n, α, β, γ, ρ)
+    return FTPlan{Float64, 2, ANNULUS}(plan, n)
+end
+
 function plan_rectdisk2cheb(::Type{Float64}, n::Integer, β)
     plan = ccall((:ft_plan_rectdisk2cheb, libfasttransforms), Ptr{ft_plan_struct}, (Cint, Float64), n, β)
     return FTPlan{Float64, 2, RECTDISK}(plan, n)
@@ -688,6 +735,10 @@ end
 
 plan_disk2cxf(::Type{Float64}, n::Integer, α) = plan_disk2cxf(Float64, n, α, 0)
 plan_disk2cxf(::Type{Float64}, n::Integer) = plan_disk2cxf(Float64, n, 0)
+plan_ann2cxf(::Type{Float64}, n::Integer, α, β, γ) = plan_ann2cxf(Float64, n, α, β, γ, 0)
+plan_ann2cxf(::Type{Float64}, n::Integer, α, β) = plan_disk2cxf(Float64, n, α, β)
+plan_ann2cxf(::Type{Float64}, n::Integer, α) = plan_disk2cxf(Float64, n, α)
+plan_ann2cxf(::Type{Float64}, n::Integer) = plan_disk2cxf(Float64, n)
 plan_rectdisk2cheb(::Type{Float64}, n::Integer) = plan_rectdisk2cheb(Float64, n, 0)
 plan_tri2cheb(::Type{Float64}, n::Integer, α, β) = plan_tri2cheb(Float64, n, α, β, 0)
 plan_tri2cheb(::Type{Float64}, n::Integer, α) = plan_tri2cheb(Float64, n, α, 0)
@@ -734,6 +785,35 @@ for (fJ, fadJ, fC, fE, K) in ((:plan_sph_synthesis, :plan_sph_analysis, :ft_plan
     end
 end
 
+for (fJ, fadJ, fC, fE, K) in ((:plan_annulus_synthesis, :plan_annulus_analysis, :ft_plan_annulus_synthesis, :ft_execute_annulus_synthesis, ANNULUSSYNTHESIS),
+                              (:plan_annulus_analysis, :plan_annulus_synthesis, :ft_plan_annulus_analysis, :ft_execute_annulus_analysis, ANNULUSANALYSIS))
+    @eval begin
+        $fJ(x::Matrix{T}, ρ; y...) where T = $fJ(T, size(x, 1), size(x, 2), ρ; y...)
+        $fJ(::Type{Complex{T}}, x...; y...) where T <: Real = $fJ(T, x...; y...)
+        function $fJ(::Type{Float64}, n::Integer, m::Integer, ρ; flags::Integer=FFTW.ESTIMATE)
+            plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Float64, Cuint), n, m, ρ, flags)
+            return FTPlan{Float64, 2, $K}(plan, n, m)
+        end
+        adjoint(p::FTPlan{T, 2, $K}) where T = AdjointFTPlan(p, $fadJ(T, p.n, p.m, ρ))
+        transpose(p::FTPlan{T, 2, $K}) where T = TransposeFTPlan(p, $fadJ(T, p.n, p.m, ρ))
+        function lmul!(p::FTPlan{Float64, 2, $K}, x::Matrix{Float64})
+            checksize(p, x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function lmul!(p::AdjointFTPlan{Float64, FTPlan{Float64, 2, $K}}, x::Matrix{Float64})
+            checksize(p, x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+        function lmul!(p::TransposeFTPlan{Float64, FTPlan{Float64, 2, $K}}, x::Matrix{Float64})
+            checksize(p, x)
+            ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'T', p, x, size(x, 1), size(x, 2))
+            return x
+        end
+    end
+end
+
 for (fJ, fadJ, fC, fE, K) in ((:plan_tet_synthesis, :plan_tet_analysis, :ft_plan_tet_synthesis, :ft_execute_tet_synthesis, TETRAHEDRONSYNTHESIS),
                               (:plan_tet_analysis, :plan_tet_synthesis, :ft_plan_tet_analysis, :ft_execute_tet_analysis, TETRAHEDRONANALYSIS))
     @eval begin
@@ -873,8 +953,10 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bbbfmvf, :Float32),
     end
 end
 
-for (fJ, fC, elty) in ((:lmul!, :ft_mpmv, :Float64),
-                       (:ldiv!, :ft_mpsv, :Float64))
+for (fJ, fC, elty) in ((:lmul!, :ft_mpmvf, :Float32),
+                       (:ldiv!, :ft_mpsvf, :Float32),
+                       (:lmul!, :ft_mpmv , :Float64),
+                       (:ldiv!, :ft_mpsv , :Float64))
     @eval begin
         function $fJ(p::ModifiedFTPlan{$elty}, x::StridedVector{$elty})
             checksize(p, x)
@@ -971,8 +1053,10 @@ for (fJ, fC, elty) in ((:lmul!, :ft_bbbfmmf, :Float32),
     end
 end
 
-for (fJ, fC, elty) in ((:lmul!, :ft_mpmm, :Float64),
-                       (:ldiv!, :ft_mpsm, :Float64))
+for (fJ, fC, elty) in ((:lmul!, :ft_mpmmf, :Float32),
+                       (:ldiv!, :ft_mpsmf, :Float32),
+                       (:lmul!, :ft_mpmm , :Float64),
+                       (:ldiv!, :ft_mpsm , :Float64))
     @eval begin
         function $fJ(p::ModifiedFTPlan{$elty}, x::StridedMatrix{$elty})
             checksize(p, x)
@@ -1027,6 +1111,8 @@ for (fJ, fC, T, N, K) in ((:lmul!, :ft_execute_sph2fourier, Float64, 2, SPHERE),
                           (:ldiv!, :ft_execute_fourier2spinsph, Complex{Float64}, 2, SPINSPHERE),
                           (:lmul!, :ft_execute_disk2cxf, Float64, 2, DISK),
                           (:ldiv!, :ft_execute_cxf2disk, Float64, 2, DISK),
+                          (:lmul!, :ft_execute_ann2cxf, Float64, 2, ANNULUS),
+                          (:ldiv!, :ft_execute_cxf2ann, Float64, 2, ANNULUS),
                           (:lmul!, :ft_execute_rectdisk2cheb, Float64, 2, RECTDISK),
                           (:ldiv!, :ft_execute_cheb2rectdisk, Float64, 2, RECTDISK),
                           (:lmul!, :ft_execute_tri2cheb, Float64, 2, TRIANGLE),
@@ -1149,3 +1235,14 @@ for fJ in (:lmul!, :ldiv!)
         end
     end
 end
+
+for (fC, T) in ((:execute_jacobi_similarityf, Float32), (:execute_jacobi_similarity, Float64))
+    @eval begin
+        function modified_jacobi_matrix(P::ModifiedFTPlan{$T}, XP::SymTridiagonal{$T, Vector{$T}})
+            n = min(P.n, size(XP, 1))
+            XQ = SymTridiagonal(Vector{$T}(undef, n-1), Vector{$T}(undef, n-2))
+            ccall(($(string(fC)), libfasttransforms), Cvoid, (Ptr{ft_plan_struct}, Cint, Ptr{$T}, Ptr{$T}, Ptr{$T}, Ptr{$T}), P, n, XP.dv, XP.ev, XQ.dv, XQ.ev)
+            return XQ
+        end
+    end
+end

From 6a67286000d91678be67945a666f257e95f26d92 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 15 Nov 2022 11:38:24 +0000
Subject: [PATCH 122/222] actually generate annulus.jl

---
 docs/make.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/make.jl b/docs/make.jl
index bd40d517..77842971 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -6,6 +6,7 @@ const EXAMPLES_DIR = joinpath(@__DIR__, "..", "examples")
 const OUTPUT_DIR   = joinpath(@__DIR__, "src/generated")
 
 examples = [
+    "annulus.jl",
     "automaticdifferentiation.jl",
     "chebyshev.jl",
     "disk.jl",
@@ -38,6 +39,7 @@ makedocs(
                     "Home" => "index.md",
                     "Development" => "dev.md",
                     "Examples" => [
+                        "generated/annulus.md",
                         "generated/automaticdifferentiation.md",
                         "generated/chebyshev.md",
                         "generated/disk.md",

From cd41a88b71bfddcba8a2fe61e8b44c30358e5e9b Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 15 Nov 2022 11:48:11 +0000
Subject: [PATCH 123/222] Update annulus.jl

---
 examples/annulus.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/annulus.jl b/examples/annulus.jl
index 4feb5b45..b848cc86 100644
--- a/examples/annulus.jl
+++ b/examples/annulus.jl
@@ -3,8 +3,8 @@
 # ```math
 #   f(x,y) = \frac{x^3}{x^2+y^2-\frac{1}{4}},
 # ```
-# over the annulus defined by $\{(r,\theta) : \frac{2}{3} < r < 1, 0 < \theta < 2\pi\}$.
-# We will calculate the integral:
+# over the annulus defined by $\{(r,\theta) : \rho < r < 1, 0 < \theta < 2\pi\}$
+# with parameter $\rho = \frac{2}{3}$. We will calculate the integral:
 # ```math
 #   \int_0^{2\pi}\int_{\frac{2}{3}}^1 f(r\cos\theta,r\sin\theta)^2r{\rm\,d}r{\rm\,d}\theta,
 # ```

From 4875af3cdf1e79d5a294a91256cfa9dca88e9902 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 15 Nov 2022 15:22:41 +0000
Subject: [PATCH 124/222] add tests

---
 Project.toml                   |  4 +--
 docs/src/dev.md                |  8 ++---
 src/libfasttransforms.jl       |  7 ++--
 test/libfasttransformstests.jl | 61 +++++++++++++++++++++-------------
 4 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/Project.toml b/Project.toml
index 58cbad27..0d6dc2e4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.7"
+version = "0.14.8"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -20,7 +20,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 AbstractFFTs = "1.0"
 FFTW = "1"
 FastGaussQuadrature = "0.4, 0.5"
-FastTransforms_jll = "0.6.1"
+FastTransforms_jll = "0.6.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
 GenericFFT = "0.1"
 Reexport = "0.2, 1.0"
diff --git a/docs/src/dev.md b/docs/src/dev.md
index e45211b7..d1aadc1e 100644
--- a/docs/src/dev.md
+++ b/docs/src/dev.md
@@ -37,13 +37,13 @@ This lets the developer experiment with new features through `ccall`ing into ble
 To get from a C library release to a Julia package release, the developer needs to update Yggdrasil's [build_tarballs.jl](https://github.com/JuliaPackaging/Yggdrasil/blob/master/F/FastTransforms/build_tarballs.jl) script for the new version and its 256-bit SHA. On macOS, the SHA can be found by:
 
 ```julia
-shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.6.1 --output FastTransforms.tar.gz
+shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.6.2 --output FastTransforms.tar.gz
   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                  Dload  Upload   Total   Spent    Left  Speed
 100  168k    0  168k    0     0   429k      0 --:--:-- --:--:-- --:--:--  429k
 
 shell> shasum -a 256 FastTransforms.tar.gz
-4ee42f264626b335e3f8bed7a10935d54393589813d8558802f0eae9ca46d36e  FastTransforms.tar.gz
+fd00befcb0c20ba962a8744a7b9139355071ee95be70420de005b7c0f6e023aa  FastTransforms.tar.gz
 
 shell> rm -f FastTransforms.tar.gz
 
@@ -52,7 +52,7 @@ shell> rm -f FastTransforms.tar.gz
 Using [SHA.jl](https://github.com/JuliaCrypto/SHA.jl), the SHA can also be found by:
 
 ```julia
-shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.6.1 --output FastTransforms.tar.gz
+shell> curl https://codeload.github.com/MikaelSlevinsky/FastTransforms/tar.gz/v0.6.2 --output FastTransforms.tar.gz
   % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                  Dload  Upload   Total   Spent    Left  Speed
 100  168k    0  168k    0     0   442k      0 --:--:-- --:--:-- --:--:--  443k
@@ -62,7 +62,7 @@ julia> using SHA
 julia> open("FastTransforms.tar.gz") do f
            bytes2hex(sha256(f))
        end
-"4ee42f264626b335e3f8bed7a10935d54393589813d8558802f0eae9ca46d36e"
+"fd00befcb0c20ba962a8744a7b9139355071ee95be70420de005b7c0f6e023aa"
 
 shell> rm -f FastTransforms.tar.gz
 
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index 00e2e8db..dd42b4ce 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -785,6 +785,9 @@ for (fJ, fadJ, fC, fE, K) in ((:plan_sph_synthesis, :plan_sph_analysis, :ft_plan
     end
 end
 
+ft_get_rho_annulus_fftw_plan(p::FTPlan{Float64, 2, ANNULUSSYNTHESIS}) = ccall((:ft_get_rho_annulus_fftw_plan, libfasttransforms), Float64, (Ptr{ft_plan_struct}, ), p)
+ft_get_rho_annulus_fftw_plan(p::FTPlan{Float64, 2, ANNULUSANALYSIS}) = ccall((:ft_get_rho_annulus_fftw_plan, libfasttransforms), Float64, (Ptr{ft_plan_struct}, ), p)
+
 for (fJ, fadJ, fC, fE, K) in ((:plan_annulus_synthesis, :plan_annulus_analysis, :ft_plan_annulus_synthesis, :ft_execute_annulus_synthesis, ANNULUSSYNTHESIS),
                               (:plan_annulus_analysis, :plan_annulus_synthesis, :ft_plan_annulus_analysis, :ft_execute_annulus_analysis, ANNULUSANALYSIS))
     @eval begin
@@ -794,8 +797,8 @@ for (fJ, fadJ, fC, fE, K) in ((:plan_annulus_synthesis, :plan_annulus_analysis,
             plan = ccall(($(string(fC)), libfasttransforms), Ptr{ft_plan_struct}, (Cint, Cint, Float64, Cuint), n, m, ρ, flags)
             return FTPlan{Float64, 2, $K}(plan, n, m)
         end
-        adjoint(p::FTPlan{T, 2, $K}) where T = AdjointFTPlan(p, $fadJ(T, p.n, p.m, ρ))
-        transpose(p::FTPlan{T, 2, $K}) where T = TransposeFTPlan(p, $fadJ(T, p.n, p.m, ρ))
+        adjoint(p::FTPlan{T, 2, $K}) where T = AdjointFTPlan(p, $fadJ(T, p.n, p.m, ft_get_rho_annulus_fftw_plan(p)))
+        transpose(p::FTPlan{T, 2, $K}) where T = TransposeFTPlan(p, $fadJ(T, p.n, p.m, ft_get_rho_annulus_fftw_plan(p)))
         function lmul!(p::FTPlan{Float64, 2, $K}, x::Matrix{Float64})
             checksize(p, x)
             ccall(($(string(fE)), libfasttransforms), Cvoid, (Cint, Ptr{ft_plan_struct}, Ptr{Float64}, Cint, Cint), 'N', p, x, size(x, 1), size(x, 2))
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 5c80aaad..e224666d 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -23,7 +23,7 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         @test f ≈ fd
     end
 
-    α, β, γ, δ, λ, μ = 0.1, 0.2, 0.3, 0.4, 0.5, 0.6
+    α, β, γ, δ, λ, μ, ρ = 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7
     function test_1d_plans(p1, p2, x)
         y = p1*x
         z = p2*y
@@ -104,29 +104,31 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
 
     @testset "Modified classical orthonormal polynomial transforms" begin
         (n, α, β) = (16, 0, 0)
-        P1 = plan_modifiedjac2jac(Float64, n, α, β, [0.9428090415820636, -0.32659863237109055, -0.42163702135578396, 0.2138089935299396]) # u1(x) = (1-x)^2*(1+x)
-        P2 = plan_modifiedjac2jac(Float64, n, α, β, [0.9428090415820636, -0.32659863237109055, -0.42163702135578396, 0.2138089935299396], [1.4142135623730951]) # u2(x) = (1-x)^2*(1+x)
-        P3 = plan_modifiedjac2jac(Float64, n, α, β, [-0.9428090415820636, 0.32659863237109055, 0.42163702135578396, -0.2138089935299396], [-5.185449728701348, 0.0, 0.42163702135578374]) # u3(x) = -(1-x)^2*(1+x), v3(x) = -(2-x)*(2+x)
-        P4 = plan_modifiedjac2jac(Float64, n, α+2, β+1, [1.1547005383792517], [4.387862045841156, 0.1319657758147716, -0.20865621238292037]) # v4(x) = (2-x)*(2+x)
-
-        @test P1*I ≈ P2*I
-        @test P1\I ≈ P2\I
-        @test P3*I ≈ P2*(P4*I)
-        @test P3\I ≈ P4\(P2\I)
-
-        P5 = plan_modifiedlag2lag(Float64, n, α, [2.0, -4.0, 2.0]) # u5(x) = x^2
-        P6 = plan_modifiedlag2lag(Float64, n, α, [2.0, -4.0, 2.0], [1.0]) # u6(x) = x^2
-        P7 = plan_modifiedlag2lag(Float64, n, α, [2.0, -4.0, 2.0], [7.0, -7.0, 2.0]) # u7(x) = x^2, v7(x) = (1+x)*(2+x)
-        P8 = plan_modifiedlag2lag(Float64, n, α+2, [sqrt(2.0)], [sqrt(1058.0), -sqrt(726.0), sqrt(48.0)]) # v8(x) = (1+x)*(2+x)
-
-        @test P5*I ≈ P6*I
-        @test P5\I ≈ P6\I
-        @test P7*I ≈ P6*(P8*I)
-        @test P7\I ≈ P8\(P6\I)
-
-        P9 = plan_modifiedherm2herm(Float64, n, [2.995504568550877, 0.0, 3.7655850551068593, 0.0, 1.6305461589167827], [2.995504568550877, 0.0, 3.7655850551068593, 0.0, 1.6305461589167827]) # u9(x) = 1+x^2+x^4, v9(x) = 1+x^2+x^4
-
-        @test P9*I ≈ P9\I
+        for T in (Float32, Float64)
+            P1 = plan_modifiedjac2jac(T, n, α, β, T[0.9428090415820636, -0.32659863237109055, -0.42163702135578396, 0.2138089935299396]) # u1(x) = (1-x)^2*(1+x)
+            P2 = plan_modifiedjac2jac(T, n, α, β, T[0.9428090415820636, -0.32659863237109055, -0.42163702135578396, 0.2138089935299396], T[1.4142135623730951]) # u2(x) = (1-x)^2*(1+x)
+            P3 = plan_modifiedjac2jac(T, n, α, β, T[-0.9428090415820636, 0.32659863237109055, 0.42163702135578396, -0.2138089935299396], T[-5.185449728701348, 0.0, 0.42163702135578374]) # u3(x) = -(1-x)^2*(1+x), v3(x) = -(2-x)*(2+x)
+            P4 = plan_modifiedjac2jac(T, n, α+2, β+1, T[1.1547005383792517], T[4.387862045841156, 0.1319657758147716, -0.20865621238292037]) # v4(x) = (2-x)*(2+x)
+
+            @test P1*I ≈ P2*I
+            @test P1\I ≈ P2\I
+            @test P3*I ≈ P2*(P4*I)
+            @test P3\I ≈ P4\(P2\I)
+
+            P5 = plan_modifiedlag2lag(T, n, α, T[2.0, -4.0, 2.0]) # u5(x) = x^2
+            P6 = plan_modifiedlag2lag(T, n, α, T[2.0, -4.0, 2.0], T[1.0]) # u6(x) = x^2
+            P7 = plan_modifiedlag2lag(T, n, α, T[2.0, -4.0, 2.0], T[7.0, -7.0, 2.0]) # u7(x) = x^2, v7(x) = (1+x)*(2+x)
+            P8 = plan_modifiedlag2lag(T, n, α+2, T[sqrt(2.0)], T[sqrt(1058.0), -sqrt(726.0), sqrt(48.0)]) # v8(x) = (1+x)*(2+x)
+
+            @test P5*I ≈ P6*I
+            @test P5\I ≈ P6\I
+            @test isapprox(P7*I, P6*(P8*I); rtol = eps(T)^(1/4))
+            @test isapprox(P7\I, P8\(P6\I); rtol = eps(T)^(1/4))
+
+            P9 = plan_modifiedherm2herm(T, n, T[2.995504568550877, 0.0, 3.7655850551068593, 0.0, 1.6305461589167827], T[2.995504568550877, 0.0, 3.7655850551068593, 0.0, 1.6305461589167827]) # u9(x) = 1+x^2+x^4, v9(x) = 1+x^2+x^4
+
+            @test P9*I ≈ P9\I
+        end
     end
 
     function test_nd_plans(p, ps, pa, A)
@@ -175,6 +177,17 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
     pa = plan_disk_analysis(A)
     test_nd_plans(p, ps, pa, A)
 
+    A = diskones(Float64, n, 4n-3)
+    p = plan_ann2cxf(A, α, β, 0, ρ)
+    ps = plan_annulus_synthesis(A, ρ)
+    pa = plan_annulus_analysis(A, ρ)
+    test_nd_plans(p, ps, pa, A)
+    A = diskones(Float64, n, 4n-3) + im*diskones(Float64, n, 4n-3)
+    p = plan_ann2cxf(A, α, β, 0, ρ)
+    ps = plan_annulus_synthesis(A, ρ)
+    pa = plan_annulus_analysis(A, ρ)
+    test_nd_plans(p, ps, pa, A)
+
     A = rectdiskones(Float64, n, n)
     p = plan_rectdisk2cheb(A, β)
     ps = plan_rectdisk_synthesis(A)

From 32a6a0b5565548008b1aabfc0ed33aa1857f8b8c Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Tue, 15 Nov 2022 15:56:54 +0000
Subject: [PATCH 125/222] reduce notation burden

---
 examples/annulus.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/annulus.jl b/examples/annulus.jl
index b848cc86..73c19f40 100644
--- a/examples/annulus.jl
+++ b/examples/annulus.jl
@@ -39,7 +39,7 @@ M = 4N-3
 ρ = 2/3
 
 # The radial grid:
-r = [begin t = (N-n-0.5)/(2N); ct2 = sinpi(t); st2 = cospi(t); sqrt(ct2^2+ρ^2*st2^2) end; for n in 0:N-1]
+r = [begin t = (N-n-0.5)/(2N); ct = sinpi(t); st = cospi(t); sqrt(ct^2+ρ^2*st^2) end; for n in 0:N-1]
 
 # The angular grid (mod $\pi$):
 θ = (0:M-1)*2/M

From 45a1f9fb0272fc04740a6d1e2002bdd5f6371e89 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Wed, 14 Dec 2022 10:11:57 +0000
Subject: [PATCH 126/222] Add inv for 1st kind Chebyshev transforms (#190)

* and inv for 1st kind Chebyshev transforms

* v0.14.9

* add tests

* add tests

* test on 1
---
 .github/workflows/ci.yml  |  1 +
 Project.toml              |  2 +-
 src/FastTransforms.jl     |  2 +-
 src/chebyshevtransform.jl | 15 ++++++++++++++-
 test/chebyshevtests.jl    | 25 +++++++++++++++++++++++++
 5 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 169112d7..bd93db0d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,6 +10,7 @@ jobs:
       matrix:
         version:
           - '1.7'
+          - '1'
         os:
           - ubuntu-latest
           - macOS-latest
diff --git a/Project.toml b/Project.toml
index 0d6dc2e4..7a0a97e0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.8"
+version = "0.14.9"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 6e863f63..b7442ec5 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -17,7 +17,7 @@ import AbstractFFTs: Plan, ScaledPlan,
                      plan_fft, plan_ifft, plan_bfft, plan_fft!, plan_ifft!,
                      plan_bfft!, plan_rfft, plan_irfft, plan_brfft,
                      fftshift, ifftshift, rfft_output_size, brfft_output_size,
-                     plan_inv, normalization
+                     normalization
 
 import FFTW: dct, dct!, idct, idct!, plan_dct!, plan_idct!,
              plan_dct, plan_idct, fftwNumber
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 0cbcb84a..33f410ce 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -239,6 +239,10 @@ IChebyshevTransformPlan{T,kind,K}(F::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T
 inv(P::ChebyshevTransformPlan{T,2,K}) where {T,K} = IChebyshevTransformPlan{T,2,K}(P.plan)
 inv(P::IChebyshevTransformPlan{T,2,K}) where {T,K} = ChebyshevTransformPlan{T,2,K}(P.plan)
 
+inv(P::ChebyshevTransformPlan{T,1,K,inplace,N}) where {T,K,inplace,N} = IChebyshevTransformPlan{T,1,kindtuple(IFIRSTKIND,N,P.plan.region...)}(inv(P.plan).p)
+inv(P::IChebyshevTransformPlan{T,1,K,inplace,N}) where {T,K,inplace,N} = ChebyshevTransformPlan{T,1,kindtuple(FIRSTKIND,N,P.plan.region...)}(inv(P.plan).p)
+
+
 
 \(P::ChebyshevTransformPlan, x::AbstractArray) = inv(P) * x
 \(P::IChebyshevTransformPlan, x::AbstractArray) = inv(P) * x
@@ -526,13 +530,22 @@ function plan_ichebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws
 end
 function plan_ichebyshevutransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
     any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
-    IChebyshevUTransformPlan{T,2,kindtuple(USECONDKIND,N,dims...)}(FFTW.plan_r2r(x, USECONDKIND))
+    IChebyshevUTransformPlan{T,2,kindtuple(USECONDKIND,N,dims...)}(FFTW.plan_r2r(x, USECONDKIND, dims...; kws...))
 end
 
 
 plan_ichebyshevutransform!(x::AbstractArray, dims...; kws...) = plan_ichebyshevutransform!(x, Val(1), dims...; kws...)
 plan_ichebyshevutransform(x::AbstractArray, dims...; kws...) = plan_ichebyshevutransform(x, Val(1), dims...; kws...)
 
+# second kind Chebyshev transforms share a plan with their inverse
+# so we support this via inv
+inv(P::ChebyshevUTransformPlan{T,2,K}) where {T,K} = IChebyshevUTransformPlan{T,2,K}(P.plan)
+inv(P::IChebyshevUTransformPlan{T,2,K}) where {T,K} = ChebyshevUTransformPlan{T,2,K}(P.plan)
+
+inv(P::ChebyshevUTransformPlan{T,1,K,inplace,N}) where {T,K,inplace,N} = IChebyshevUTransformPlan{T,1,kindtuple(IUFIRSTKIND,N,P.plan.region...)}(inv(P.plan).p)
+inv(P::IChebyshevUTransformPlan{T,1,K,inplace,N}) where {T,K,inplace,N} = ChebyshevUTransformPlan{T,1,kindtuple(UFIRSTKIND,N,P.plan.region...)}(inv(P.plan).p)
+
+
 function _ichebyu1_postscale!(_, x::AbstractVector{T}) where T
     n = length(x)
     @inbounds for k=1:n # sqrt(1-x_j^2) weight
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index d98d07b4..58a93481 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -278,4 +278,29 @@ using FastTransforms, Test
         F = plan_chebyshevtransform([1.,2,3])
         @test chebyshevtransform(1.0:3) == F * (1:3)
     end
+
+    @testset "inv" begin
+        x = randn(5)
+        for F in (plan_chebyshevtransform(x), plan_chebyshevtransform(x, Val(2)),
+                  plan_chebyshevutransform(x), plan_chebyshevutransform(x, Val(2)),
+                  plan_ichebyshevtransform(x), plan_ichebyshevtransform(x, Val(2)),
+                  plan_ichebyshevutransform(x), plan_ichebyshevutransform(x, Val(2)))
+            @test F \ (F*x) ≈ F * (F\x) ≈ x
+        end
+
+        X = randn(5,4)
+        for F in (plan_chebyshevtransform(X,Val(1),1), plan_chebyshevtransform(X, Val(2),1),
+            plan_chebyshevtransform(X,Val(1),2), plan_chebyshevtransform(X, Val(2),2),
+            plan_ichebyshevtransform(X,Val(1),1), plan_ichebyshevtransform(X, Val(2),1),
+            plan_ichebyshevtransform(X,Val(1),2), plan_ichebyshevtransform(X, Val(2),2))
+            @test F \ (F*X) ≈ F * (F\X) ≈ X
+        end
+        # Matrix isn't implemented for chebyshevu
+        for F in (plan_chebyshevutransform(X,Val(1),1), plan_chebyshevutransform(X, Val(2),1),
+            plan_chebyshevutransform(X,Val(1),2), plan_chebyshevutransform(X, Val(2),2),
+            plan_ichebyshevutransform(X,Val(1),1), plan_ichebyshevutransform(X, Val(2),1),
+            plan_ichebyshevutransform(X,Val(1),2), plan_ichebyshevutransform(X, Val(2),2))
+            @test_broken F \ (F*X) ≈ F * (F\X) ≈ X
+        end
+    end
 end

From 0ea45398cdaa46aaea3ccaf15f73afc11be5cccf Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Thu, 9 Mar 2023 21:56:02 +0530
Subject: [PATCH 127/222] Temporary upper bound on FFTW to < v1.6 (#192)

FFTW v1.6 changes the type parameters of an r2rFFTWPlan, so this package breaks on that. This PR temporarily limits this package to older versions of FFTW while the issue is resolved.
---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 7a0a97e0..c61a93b6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.9"
+version = "0.14.10"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -18,7 +18,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "1.0"
-FFTW = "1"
+FFTW = "1 - 1.5"
 FastGaussQuadrature = "0.4, 0.5"
 FastTransforms_jll = "0.6.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"

From 9bfab48e2f3f5138de51dd1ff0e379dfd7a4276c Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Thu, 9 Mar 2023 22:23:00 +0530
Subject: [PATCH 128/222] Make ChebyshevTransformPlan compatible with FFTW v1.6
 (#193)

* r2rFFTWPlan compatible with FFTW v1.6

* Add tests

* Use explicit type in kindtuple

---------

Co-authored-by: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
---
 Project.toml              |   4 +-
 src/chebyshevtransform.jl |  86 ++++++++--------
 test/chebyshevtests.jl    | 206 ++++++++++++++++++++++++++------------
 3 files changed, 188 insertions(+), 108 deletions(-)

diff --git a/Project.toml b/Project.toml
index c61a93b6..0d57bd28 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.10"
+version = "0.14.11"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -18,7 +18,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "1.0"
-FFTW = "1 - 1.5"
+FFTW = "1.6"
 FastGaussQuadrature = "0.4, 0.5"
 FastTransforms_jll = "0.6.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 33f410ce..c77941d8 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -15,35 +15,35 @@ struct ChebyshevTransformPlan{T,kind,K,inplace,N,R} <: ChebyshevPlan{T}
     ChebyshevTransformPlan{T,kind,K,inplace,N,R}() where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}()
 end
 
-ChebyshevTransformPlan{T,kind,K}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
+ChebyshevTransformPlan{T,kind}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
     ChebyshevTransformPlan{T,kind,K,inplace,N,R}(plan)
 
 # jump through some hoops to make inferrable
-@inline kindtuple(KIND,N) = ntuple(_ -> KIND,N)
-@inline kindtuple(KIND,N,::Integer) = (KIND,)
+@inline kindtuple(N) = NTuple{N,Int32}
+@inline kindtuple(N,region...) = Vector{Int32}
 function plan_chebyshevtransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevTransformPlan{T,1,kindtuple(FIRSTKIND,N,dims...),true,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
+        ChebyshevTransformPlan{T,1,kindtuple(N,dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
-        ChebyshevTransformPlan{T,1,kindtuple(FIRSTKIND,N,dims...)}(FFTW.plan_r2r!(x, FIRSTKIND, dims...; kws...))
+        ChebyshevTransformPlan{T,1}(FFTW.plan_r2r!(x, FIRSTKIND, dims...; kws...))
     end
 end
 function plan_chebyshevtransform!(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
     any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
-    ChebyshevTransformPlan{T,2,kindtuple(SECONDKIND,N,dims...)}(FFTW.plan_r2r!(x, SECONDKIND, dims...; kws...))
+    ChebyshevTransformPlan{T,2}(FFTW.plan_r2r!(x, SECONDKIND, dims...; kws...))
 end
 
 
 function plan_chebyshevtransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevTransformPlan{T,1,kindtuple(FIRSTKIND,N,dims...),false,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
+        ChebyshevTransformPlan{T,1,kindtuple(N,dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
-        ChebyshevTransformPlan{T,1,kindtuple(FIRSTKIND,N,dims...)}(FFTW.plan_r2r(x, FIRSTKIND, dims...; kws...))
+        ChebyshevTransformPlan{T,1}(FFTW.plan_r2r(x, FIRSTKIND, dims...; kws...))
     end
 end
 function plan_chebyshevtransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
     any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
-    ChebyshevTransformPlan{T,2,kindtuple(SECONDKIND,N,dims...)}(FFTW.plan_r2r(x, SECONDKIND, dims...; kws...))
+    ChebyshevTransformPlan{T,2}(FFTW.plan_r2r(x, SECONDKIND, dims...; kws...))
 end
 
 plan_chebyshevtransform!(x::AbstractArray, dims...; kws...) = plan_chebyshevtransform!(x, Val(1), dims...; kws...)
@@ -143,7 +143,7 @@ function _prod_size(sz, d)
 end
 
 
-@inline function _cheb1_rescale!(d::UnitRange, y::AbstractArray)
+@inline function _cheb1_rescale!(d, y::AbstractArray)
     for k in d
         ldiv_dim_begin!(2, k, y)
     end
@@ -175,7 +175,7 @@ function _cheb2_rescale!(d::Number, y::AbstractArray)
 end
 
 # TODO: higher dimensional arrays
-function _cheb2_rescale!(d::UnitRange, y::AbstractArray)
+function _cheb2_rescale!(d, y::AbstractArray)
     for k in d
         ldiv_dim_begin!(2, k, y)
         ldiv_dim_end!(2, k, y)
@@ -229,18 +229,18 @@ struct IChebyshevTransformPlan{T,kind,K,inplace,N,R} <: ChebyshevPlan{T}
     IChebyshevTransformPlan{T,kind,K,inplace,N,R}() where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}()
 end
 
-IChebyshevTransformPlan{T,kind,K}(F::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
+IChebyshevTransformPlan{T,kind}(F::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
     IChebyshevTransformPlan{T,kind,K,inplace,N,R}(F)
 
 
 
 # second kind Chebyshev transforms share a plan with their inverse
 # so we support this via inv
-inv(P::ChebyshevTransformPlan{T,2,K}) where {T,K} = IChebyshevTransformPlan{T,2,K}(P.plan)
-inv(P::IChebyshevTransformPlan{T,2,K}) where {T,K} = ChebyshevTransformPlan{T,2,K}(P.plan)
+inv(P::ChebyshevTransformPlan{T,2}) where {T} = IChebyshevTransformPlan{T,2}(P.plan)
+inv(P::IChebyshevTransformPlan{T,2}) where {T} = ChebyshevTransformPlan{T,2}(P.plan)
 
-inv(P::ChebyshevTransformPlan{T,1,K,inplace,N}) where {T,K,inplace,N} = IChebyshevTransformPlan{T,1,kindtuple(IFIRSTKIND,N,P.plan.region...)}(inv(P.plan).p)
-inv(P::IChebyshevTransformPlan{T,1,K,inplace,N}) where {T,K,inplace,N} = ChebyshevTransformPlan{T,1,kindtuple(FIRSTKIND,N,P.plan.region...)}(inv(P.plan).p)
+inv(P::ChebyshevTransformPlan{T,1}) where {T} = IChebyshevTransformPlan{T,1}(inv(P.plan).p)
+inv(P::IChebyshevTransformPlan{T,1}) where {T} = ChebyshevTransformPlan{T,1}(inv(P.plan).p)
 
 
 
@@ -250,9 +250,9 @@ inv(P::IChebyshevTransformPlan{T,1,K,inplace,N}) where {T,K,inplace,N} = Chebysh
 
 function plan_ichebyshevtransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevTransformPlan{T,1,kindtuple(IFIRSTKIND,N,dims...),true,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
+        IChebyshevTransformPlan{T,1,kindtuple(N,dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
-        IChebyshevTransformPlan{T,1,kindtuple(IFIRSTKIND,N,dims...)}(FFTW.plan_r2r!(x, IFIRSTKIND, dims...; kws...))
+        IChebyshevTransformPlan{T,1}(FFTW.plan_r2r!(x, IFIRSTKIND, dims...; kws...))
     end
 end
 
@@ -262,9 +262,9 @@ end
 
 function plan_ichebyshevtransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevTransformPlan{T,1,kindtuple(IFIRSTKIND,N,dims...),false,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
+        IChebyshevTransformPlan{T,1,kindtuple(N,dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
-        IChebyshevTransformPlan{T,1,kindtuple(IFIRSTKIND,N,dims...)}(FFTW.plan_r2r(x, IFIRSTKIND, dims...; kws...))
+        IChebyshevTransformPlan{T,1}(FFTW.plan_r2r(x, IFIRSTKIND, dims...; kws...))
     end
 end
 
@@ -279,7 +279,7 @@ plan_ichebyshevtransform(x::AbstractArray, dims...; kws...) = plan_ichebyshevtra
     lmul_dim_begin!(2, d, x)
     x
 end
-@inline function _icheb1_prescale!(d::UnitRange, x::AbstractArray)
+@inline function _icheb1_prescale!(d, x::AbstractArray)
     for k in d
         _icheb1_prescale!(k, x)
     end
@@ -290,7 +290,7 @@ end
     x
 end
 
-@inline function _icheb1_postscale!(d::UnitRange, x::AbstractArray)
+@inline function _icheb1_postscale!(d, x::AbstractArray)
     for k in d
         _icheb1_postscale!(k, x)
     end
@@ -321,7 +321,7 @@ end
     lmul_dim_end!(2, d, x)
     x
 end
-@inline function _icheb2_prescale!(d::UnitRange, x::AbstractArray)
+@inline function _icheb2_prescale!(d, x::AbstractArray)
     for k in d
         _icheb2_prescale!(k, x)
     end
@@ -333,7 +333,7 @@ end
     ldiv_dim_end!(2, d, x)
     x
 end
-@inline function _icheb2_postrescale!(d::UnitRange, x::AbstractArray)
+@inline function _icheb2_postrescale!(d, x::AbstractArray)
     for k in d
         _icheb2_postrescale!(k, x)
     end
@@ -344,7 +344,7 @@ end
     lmul!(convert(T, size(y,d) - 1)/2, y)
     y
 end
-@inline function _icheb2_rescale!(d::UnitRange, y::AbstractArray{T}) where T
+@inline function _icheb2_rescale!(d, y::AbstractArray{T}) where T
     _icheb2_prescale!(d, y)
     lmul!(_prod_size(convert.(T, size(y) .- 1)./2, d), y)
     y
@@ -384,32 +384,32 @@ struct ChebyshevUTransformPlan{T,kind,K,inplace,N,R} <: ChebyshevPlan{T}
     ChebyshevUTransformPlan{T,kind,K,inplace,N,R}() where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}()
 end
 
-ChebyshevUTransformPlan{T,kind,K}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
+ChebyshevUTransformPlan{T,kind}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
     ChebyshevUTransformPlan{T,kind,K,inplace,N,R}(plan)
 
 
 function plan_chebyshevutransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevUTransformPlan{T,1,kindtuple(UFIRSTKIND,N,dims...),true,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
+        ChebyshevUTransformPlan{T,1,kindtuple(N,dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
-        ChebyshevUTransformPlan{T,1,kindtuple(UFIRSTKIND,N,dims...)}(FFTW.plan_r2r!(x, UFIRSTKIND, dims...; kws...))
+        ChebyshevUTransformPlan{T,1}(FFTW.plan_r2r!(x, UFIRSTKIND, dims...; kws...))
     end
 end
 function plan_chebyshevutransform!(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
     any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
-    ChebyshevUTransformPlan{T,2,kindtuple(USECONDKIND,N,dims...)}(FFTW.plan_r2r!(x, USECONDKIND, dims...; kws...))
+    ChebyshevUTransformPlan{T,2}(FFTW.plan_r2r!(x, USECONDKIND, dims...; kws...))
 end
 
 function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevUTransformPlan{T,1,kindtuple(UFIRSTKIND,N,dims...),false,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
+        ChebyshevUTransformPlan{T,1,kindtuple(N,dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
-        ChebyshevUTransformPlan{T,1,kindtuple(UFIRSTKIND,N,dims...)}(FFTW.plan_r2r(x, UFIRSTKIND, dims...; kws...))
+        ChebyshevUTransformPlan{T,1}(FFTW.plan_r2r(x, UFIRSTKIND, dims...; kws...))
     end
 end
 function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
     any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
-    ChebyshevUTransformPlan{T,2,kindtuple(USECONDKIND,N,dims...)}(FFTW.plan_r2r(x, USECONDKIND, dims...; kws...))
+    ChebyshevUTransformPlan{T,2}(FFTW.plan_r2r(x, USECONDKIND, dims...; kws...))
 end
 
 plan_chebyshevutransform!(x::AbstractArray, dims...; kws...) = plan_chebyshevutransform!(x, Val(1), dims...; kws...)
@@ -506,31 +506,31 @@ struct IChebyshevUTransformPlan{T,kind,K,inplace,N,R} <: ChebyshevPlan{T}
     IChebyshevUTransformPlan{T,kind,K,inplace,N,R}() where {T,kind,K,inplace,N,R} = new{T,kind,K,inplace,N,R}()
 end
 
-IChebyshevUTransformPlan{T,kind,K}(F::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
+IChebyshevUTransformPlan{T,kind}(F::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,kind,K,inplace,N,R} =
     IChebyshevUTransformPlan{T,kind,K,inplace,N,R}(F)
 
 function plan_ichebyshevutransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevUTransformPlan{T,1,kindtuple(IUFIRSTKIND,N,dims...),true,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
+        IChebyshevUTransformPlan{T,1,kindtuple(N,dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
-        IChebyshevUTransformPlan{T,1,kindtuple(IUFIRSTKIND,N,dims...)}(FFTW.plan_r2r!(x, IUFIRSTKIND, dims...; kws...))
+        IChebyshevUTransformPlan{T,1}(FFTW.plan_r2r!(x, IUFIRSTKIND, dims...; kws...))
     end
 end
 function plan_ichebyshevutransform!(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
     any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
-    IChebyshevUTransformPlan{T,2,kindtuple(USECONDKIND,N,dims...)}(FFTW.plan_r2r!(x, USECONDKIND))
+    IChebyshevUTransformPlan{T,2}(FFTW.plan_r2r!(x, USECONDKIND))
 end
 
 function plan_ichebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevUTransformPlan{T,1,kindtuple(IUFIRSTKIND,N,dims...),false,N,isempty(dims) ? UnitRange{Int} : typeof(dims)}()
+        IChebyshevUTransformPlan{T,1,kindtuple(N,dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
-        IChebyshevUTransformPlan{T,1,kindtuple(IUFIRSTKIND,N,dims...)}(FFTW.plan_r2r(x, IUFIRSTKIND, dims...; kws...))
+        IChebyshevUTransformPlan{T,1}(FFTW.plan_r2r(x, IUFIRSTKIND, dims...; kws...))
     end
 end
 function plan_ichebyshevutransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
     any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
-    IChebyshevUTransformPlan{T,2,kindtuple(USECONDKIND,N,dims...)}(FFTW.plan_r2r(x, USECONDKIND, dims...; kws...))
+    IChebyshevUTransformPlan{T,2}(FFTW.plan_r2r(x, USECONDKIND, dims...; kws...))
 end
 
 
@@ -539,11 +539,11 @@ plan_ichebyshevutransform(x::AbstractArray, dims...; kws...) = plan_ichebyshevut
 
 # second kind Chebyshev transforms share a plan with their inverse
 # so we support this via inv
-inv(P::ChebyshevUTransformPlan{T,2,K}) where {T,K} = IChebyshevUTransformPlan{T,2,K}(P.plan)
-inv(P::IChebyshevUTransformPlan{T,2,K}) where {T,K} = ChebyshevUTransformPlan{T,2,K}(P.plan)
+inv(P::ChebyshevUTransformPlan{T,2}) where {T} = IChebyshevUTransformPlan{T,2}(P.plan)
+inv(P::IChebyshevUTransformPlan{T,2}) where {T} = ChebyshevUTransformPlan{T,2}(P.plan)
 
-inv(P::ChebyshevUTransformPlan{T,1,K,inplace,N}) where {T,K,inplace,N} = IChebyshevUTransformPlan{T,1,kindtuple(IUFIRSTKIND,N,P.plan.region...)}(inv(P.plan).p)
-inv(P::IChebyshevUTransformPlan{T,1,K,inplace,N}) where {T,K,inplace,N} = ChebyshevUTransformPlan{T,1,kindtuple(UFIRSTKIND,N,P.plan.region...)}(inv(P.plan).p)
+inv(P::ChebyshevUTransformPlan{T,1}) where {T} = IChebyshevUTransformPlan{T,1}(inv(P.plan).p)
+inv(P::IChebyshevUTransformPlan{T,1}) where {T} = ChebyshevUTransformPlan{T,1}(inv(P.plan).p)
 
 
 function _ichebyu1_postscale!(_, x::AbstractVector{T}) where T
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 58a93481..ae438ac0 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -23,30 +23,50 @@ using FastTransforms, Test
             n = 20
             p_1 = chebyshevpoints(T, n)
             f = exp.(p_1)
-            f̌ = @inferred(chebyshevtransform(f))
-            @test f̌ == chebyshevtransform!(copy(f))
+            g = @inferred(chebyshevtransform(f))
+            @test g == chebyshevtransform!(copy(f))
 
-            f̃ = x -> [cos(k*acos(x)) for k=0:n-1]' * f̌
+            f̃ = x -> [cos(k*acos(x)) for k=0:n-1]' * g
             @test f̃(0.1) ≈ exp(T(0.1))
-            @test @inferred(ichebyshevtransform(f̌)) ≈ ichebyshevtransform!(copy(f̌)) ≈ exp.(p_1)
+            @test @inferred(ichebyshevtransform(g)) ≈ ichebyshevtransform!(copy(g)) ≈ exp.(p_1)
 
-            f̃ = copy(f)
-            f̄ = copy(f̌)
+            fcopy = copy(f)
+            gcopy = copy(g)
             P = @inferred(plan_chebyshevtransform(f))
-            @test @inferred(P*f) == f̌
-            @test f == f̃
+            @test @inferred(P*f) == g
+            @test f == fcopy
             @test_throws ArgumentError P * T[1,2]
+            P2 = @inferred(plan_chebyshevtransform(f, Val(1), 1:1))
+            @test @inferred(P2*f) == g
+            @test_throws ArgumentError P * T[1,2]
+
             P = @inferred(plan_chebyshevtransform!(f))
-            @test @inferred(P*f) == f̌
-            @test f == f̌
+            @test @inferred(P*f) == g
+            @test f == g
             @test_throws ArgumentError P * T[1,2]
-            Pi = @inferred(plan_ichebyshevtransform(f̌))
-            @test @inferred(Pi*f̌) ≈ f̃
-            @test f̌ == f̄
+            f .= fcopy
+            P2 = @inferred(plan_chebyshevtransform!(f, 1:1))
+            @test @inferred(P2*f) == g
+            @test f == g
+            @test_throws ArgumentError P * T[1,2]
+
+            Pi = @inferred(plan_ichebyshevtransform(g))
+            @test @inferred(Pi*g) ≈ fcopy
+            @test g == gcopy
             @test_throws ArgumentError Pi * T[1,2]
-            Pi = @inferred(plan_ichebyshevtransform!(f̌))
-            @test @inferred(Pi*f̌) ≈ f̃
-            @test f̌ ≈ f̃
+            Pi2 = @inferred(plan_ichebyshevtransform(g, 1:1))
+            @test @inferred(Pi2*g) ≈ fcopy
+            @test g == gcopy
+            @test_throws ArgumentError Pi * T[1,2]
+
+            Pi = @inferred(plan_ichebyshevtransform!(g))
+            @test @inferred(Pi*g) ≈ fcopy
+            @test g ≈ fcopy
+            g .= gcopy
+            @test_throws ArgumentError Pi * T[1,2]
+            Pi2 = @inferred(plan_ichebyshevtransform!(g, 1:1))
+            @test @inferred(Pi2*g) ≈ fcopy
+            @test g ≈ fcopy
             @test_throws ArgumentError Pi * T[1,2]
 
             @test chebyshevtransform(T[1]) == T[1]
@@ -60,36 +80,58 @@ using FastTransforms, Test
             n = 20
             p_2 = chebyshevpoints(T, n, Val(2))
             f = exp.(p_2)
-            f̌ = @inferred(chebyshevtransform(f, Val(2)))
-            @test f̌ == chebyshevtransform!(copy(f), Val(2))
+            g = @inferred(chebyshevtransform(f, Val(2)))
+            @test g == chebyshevtransform!(copy(f), Val(2))
 
-            f̃ = x -> [cos(k*acos(x)) for k=0:n-1]' * f̌
+            f̃ = x -> [cos(k*acos(x)) for k=0:n-1]' * g
             @test f̃(0.1) ≈ exp(T(0.1))
-            @test @inferred(ichebyshevtransform(f̌, Val(2))) ≈ ichebyshevtransform!(copy(f̌), Val(2)) ≈ exp.(p_2)
+            @test @inferred(ichebyshevtransform(g, Val(2))) ≈ ichebyshevtransform!(copy(g), Val(2)) ≈ exp.(p_2)
 
             P = @inferred(plan_chebyshevtransform!(f, Val(2)))
             Pi = @inferred(plan_ichebyshevtransform!(f, Val(2)))
             @test all(@inferred(P \ copy(f)) .=== Pi * copy(f))
-            @test all(@inferred(Pi \ copy(f̌)) .=== P * copy(f̌))
+            @test all(@inferred(Pi \ copy(g)) .=== P * copy(g))
             @test f ≈ P \ (P*copy(f)) ≈ P * (P\copy(f)) ≈ Pi \ (Pi*copy(f)) ≈ Pi * (Pi \ copy(f))
 
-            f̃ = copy(f)
-            f̄ = copy(f̌)
+            fcopy = copy(f)
+            gcopy = copy(g)
+
             P = @inferred(plan_chebyshevtransform(f, Val(2)))
+            @test P*f == g
+            @test f == fcopy
             @test_throws ArgumentError P * T[1,2]
-            @test P*f == f̌
-            @test f == f̃
+            P = @inferred(plan_chebyshevtransform(f, Val(2), 1:1))
+            @test P*f == g
+            @test f == fcopy
+            @test_throws ArgumentError P * T[1,2]
+
             P = @inferred(plan_chebyshevtransform!(f, Val(2)))
-            @test P*f == f̌
-            @test f == f̌
+            @test P*f == g
+            @test f == g
             @test_throws ArgumentError P * T[1,2]
-            Pi = @inferred(plan_ichebyshevtransform(f̌, Val(2)))
-            @test Pi*f̌ ≈ f̃
-            @test f̌ == f̄
+            f .= fcopy
+            P = @inferred(plan_chebyshevtransform!(f, Val(2), 1:1))
+            @test P*f == g
+            @test f == g
+            @test_throws ArgumentError P * T[1,2]
+
+            Pi = @inferred(plan_ichebyshevtransform(g, Val(2)))
+            @test Pi*g ≈ fcopy
+            @test g == gcopy
+            @test_throws ArgumentError Pi * T[1,2]
+            Pi = @inferred(plan_ichebyshevtransform(g, Val(2), 1:1))
+            @test Pi*g ≈ fcopy
+            @test g == gcopy
+            @test_throws ArgumentError Pi * T[1,2]
+
+            Pi = @inferred(plan_ichebyshevtransform!(g, Val(2)))
+            @test Pi*g ≈ fcopy
+            @test g ≈ fcopy
             @test_throws ArgumentError Pi * T[1,2]
-            Pi = @inferred(plan_ichebyshevtransform!(f̌, Val(2)))
-            @test Pi*f̌ ≈ f̃
-            @test f̌ ≈ f̃
+            g .= gcopy
+            Pi = @inferred(plan_ichebyshevtransform!(g, Val(2), 1:1))
+            @test Pi*g ≈ fcopy
+            @test g ≈ fcopy
             @test_throws ArgumentError Pi * T[1,2]
 
             @test_throws ArgumentError chebyshevtransform(T[1], Val(2))
@@ -104,29 +146,50 @@ using FastTransforms, Test
             n = 20
             p_1 = chebyshevpoints(T, n)
             f = exp.(p_1)
-            f̌ = @inferred(chebyshevutransform(f))
+            g = @inferred(chebyshevutransform(f))
 
-            f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-1]' * f̌
+            f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-1]' * g
             @test f̃(0.1) ≈ exp(T(0.1))
-            @test ichebyshevutransform(f̌) ≈ exp.(p_1)
+            @test ichebyshevutransform(g) ≈ exp.(p_1)
 
-            f̃ = copy(f)
-            f̄ = copy(f̌)
+            fcopy = copy(f)
+            gcopy = copy(g)
             P = @inferred(plan_chebyshevutransform(f))
-            @test P*f ≈ f̌
-            @test f == f̃
+            @test P*f ≈ g
+            @test f == fcopy
             @test_throws ArgumentError P * T[1,2]
+            P = @inferred(plan_chebyshevutransform(f, 1:1))
+            @test P*f ≈ g
+            @test f == fcopy
+            @test_throws ArgumentError P * T[1,2]
+
+            P = @inferred(plan_chebyshevutransform!(f))
+            @test P*f ≈ g
+            @test f ≈ g
+            @test_throws ArgumentError P * T[1,2]
+            f .= fcopy
             P = @inferred(plan_chebyshevutransform!(f))
-            @test P*f ≈ f̌
-            @test f ≈ f̌
+            @test P*f ≈ g
+            @test f ≈ g
             @test_throws ArgumentError P * T[1,2]
-            Pi = @inferred(plan_ichebyshevutransform(f̌))
-            @test Pi*f̌ ≈ f̃
-            @test f̌ == f̄
+
+            Pi = @inferred(plan_ichebyshevutransform(g))
+            @test Pi*g ≈ fcopy
+            @test g == gcopy
+            @test_throws ArgumentError Pi * T[1,2]
+            Pi = @inferred(plan_ichebyshevutransform(g, 1:1))
+            @test Pi*g ≈ fcopy
+            @test g == gcopy
+            @test_throws ArgumentError Pi * T[1,2]
+
+            Pi = @inferred(plan_ichebyshevutransform!(g))
+            @test Pi*g ≈ fcopy
+            @test g ≈ fcopy
             @test_throws ArgumentError Pi * T[1,2]
-            Pi = @inferred(plan_ichebyshevutransform!(f̌))
-            @test Pi*f̌ ≈ f̃
-            @test f̌ ≈ f̃
+            g .= gcopy
+            Pi = @inferred(plan_ichebyshevutransform!(g))
+            @test Pi*g ≈ fcopy
+            @test g ≈ fcopy
             @test_throws ArgumentError Pi * T[1,2]
 
             @test chebyshevutransform(T[1]) == T[1]
@@ -140,29 +203,46 @@ using FastTransforms, Test
             n = 20
             p_2 = chebyshevpoints(T, n, Val(2))[2:end-1]
             f = exp.(p_2)
-            f̌ = @inferred(chebyshevutransform(f, Val(2)))
+            g = @inferred(chebyshevutransform(f, Val(2)))
 
-            f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-3]' * f̌
+            f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-3]' * g
             @test f̃(0.1) ≈ exp(T(0.1))
-            @test @inferred(ichebyshevutransform(f̌, Val(2))) ≈ exp.(p_2)
+            @test @inferred(ichebyshevutransform(g, Val(2))) ≈ exp.(p_2)
 
-            f̃ = copy(f)
-            f̄ = copy(f̌)
+            fcopy = copy(f)
+            gcopy = copy(g)
             P = @inferred(plan_chebyshevutransform(f, Val(2)))
-            @test @inferred(P*f) ≈ f̌
-            @test f ≈ f̃
+            @test @inferred(P*f) ≈ g
+            @test f ≈ fcopy
+            @test_throws ArgumentError P * T[1,2]
+            P = @inferred(plan_chebyshevutransform(f, Val(2), 1:1))
+            @test @inferred(P*f) ≈ g
+            @test f ≈ fcopy
             @test_throws ArgumentError P * T[1,2]
+
             P = @inferred(plan_chebyshevutransform!(f, Val(2)))
-            @test @inferred(P*f) ≈ f̌
-            @test f ≈ f̌
+            @test @inferred(P*f) ≈ g
+            @test f ≈ g
+            @test_throws ArgumentError P * T[1,2]
+            f .= fcopy
+            P = @inferred(plan_chebyshevutransform!(f, Val(2), 1:1))
+            @test @inferred(P*f) ≈ g
+            @test f ≈ g
             @test_throws ArgumentError P * T[1,2]
-            Pi = @inferred(plan_ichebyshevutransform(f̌, Val(2)))
-            @test @inferred(Pi*f̌) ≈ f̃
-            @test f̌ ≈ f̄
+
+            Pi = @inferred(plan_ichebyshevutransform(g, Val(2)))
+            @test @inferred(Pi*g) ≈ fcopy
+            @test g ≈ gcopy
+            @test_throws ArgumentError Pi * T[1,2]
+
+            Pi = @inferred(plan_ichebyshevutransform!(g, Val(2)))
+            @test @inferred(Pi*g) ≈ fcopy
+            @test g ≈ fcopy
             @test_throws ArgumentError Pi * T[1,2]
-            Pi = @inferred(plan_ichebyshevutransform!(f̌, Val(2)))
-            @test @inferred(Pi*f̌) ≈ f̃
-            @test f̌ ≈ f̃
+            g .= gcopy
+            Pi = @inferred(plan_ichebyshevutransform!(g, Val(2)))
+            @test @inferred(Pi*g) ≈ fcopy
+            @test g ≈ fcopy
             @test_throws ArgumentError Pi * T[1,2]
 
             @test_throws ArgumentError chebyshevutransform(T[1], Val(2))

From 4ee9e2b6780b42c9b2a80e061d778acd0bf2109b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 14 Mar 2023 16:51:45 +0000
Subject: [PATCH 129/222] CompatHelper: bump compat for ToeplitzMatrices to
 0.8, (keep existing compat) (#196)

Co-authored-by: CompatHelper Julia <compathelper_noreply@julialang.org>
---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 0d57bd28..2daedec3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -25,5 +25,5 @@ FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
 GenericFFT = "0.1"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
-ToeplitzMatrices = "0.7.1"
+ToeplitzMatrices = "0.7.1, 0.8"
 julia = "1.7"

From b6a1a35c628546768273af7432dc38682a98da2d Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Tue, 14 Mar 2023 21:11:50 +0000
Subject: [PATCH 130/222] Julia v1.9 (#197)

* Julia v1.9

* v0.14.12
---
 .github/workflows/ci.yml | 1 +
 Project.toml             | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bd93db0d..7c0aba73 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -11,6 +11,7 @@ jobs:
         version:
           - '1.7'
           - '1'
+          - '^1.9.0-0'
         os:
           - ubuntu-latest
           - macOS-latest
diff --git a/Project.toml b/Project.toml
index 2daedec3..eaed0937 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.11"
+version = "0.14.12"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From 4cc63916bbc6681900006b9a045b450ae38b2cf2 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Sat, 25 Mar 2023 14:25:33 +0000
Subject: [PATCH 131/222] Restore Toeplitz-dot-Hankel (#198)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Restore ToeplitzHankel

* Restore TH

* Toeplitz Plans

* Update toeplitzplans.jl

* add ToeplitzPlans tests

* Use ToeplitzPlan

* Make planned transforms allocation-free

* Start removing Hankel

* Remove usage of Hankel

* Start making plans multidim

* fix vector transforms

* at tests for ultra2ultra and jac2jac (the latter of which fails)

* 2D leg2cheb on one dimensio

* work on multidims

* Make C a matrix

* use 2D FFT for Leg2Cheb

* Combine DL, DR and C

* Update transforms

* dims=1 leg2cheb

* dims=2 works

* 2D leg2cheb

* Simplify setup

* work on cheb2leg

* some cheb2leg 2D transforms work

* 2d cheb2leg works

* need to speed-up maybereal!

* simplify cheb2leg

* Add complex tests

* Remove unused code

* Default to toeplitz-dot-hankel for one-off transforms to avoid expensive plans

* add plans with range dims

* unify code

* Update toeplitzhankel.jl

* add BigFloat tests

* BIgFloat tests/bug fixes

* Fix Λ sign for bigfloats

* Fix  Λ def

* support empty case
---
 Project.toml                   |   2 +-
 src/FastTransforms.jl          |  21 +++
 src/libfasttransforms.jl       |   3 +-
 src/specialfunctions.jl        |  15 +-
 src/toeplitzhankel.jl          | 287 +++++++++++++++++++++++++++++++++
 src/toeplitzplans.jl           | 223 +++++++++++++++++++++++++
 test/libfasttransformstests.jl |   2 +-
 test/runtests.jl               |   3 +-
 test/specialfunctionstests.jl  |   8 +-
 test/toeplitzhankeltests.jl    |  40 +++++
 test/toeplitzplanstests.jl     |  63 ++++++++
 test/toeplitztests.jl          |  26 ---
 12 files changed, 658 insertions(+), 35 deletions(-)
 create mode 100644 src/toeplitzhankel.jl
 create mode 100644 src/toeplitzplans.jl
 create mode 100644 test/toeplitzhankeltests.jl
 create mode 100644 test/toeplitzplanstests.jl
 delete mode 100644 test/toeplitztests.jl

diff --git a/Project.toml b/Project.toml
index eaed0937..8ba51b09 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.14.12"
+version = "0.15"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index b7442ec5..dcf15d7e 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -109,4 +109,25 @@ export sphones, sphzeros, sphrand, sphrandn, sphevaluate,
 
 include("specialfunctions.jl")
 
+include("toeplitzplans.jl")
+include("toeplitzhankel.jl")
+
+# following use libfasttransforms by default
+for f in (:jac2jac,
+    :lag2lag, :jac2ultra, :ultra2jac, :jac2cheb,
+    :cheb2jac, :ultra2cheb, :cheb2ultra, :associatedjac2jac,
+    :modifiedjac2jac, :modifiedlag2lag, :modifiedherm2herm,
+    :sph2fourier, :sphv2fourier, :disk2cxf, :ann2cxf,
+    :rectdisk2cheb, :tri2cheb, :tet2cheb)
+    lib_f = Symbol("lib_", f)
+    @eval $f(x::AbstractArray, y...; z...) = $lib_f(x::AbstractArray, y...; z...)
+end
+
+# following use Toeplitz-Hankel to avoid expensive plans
+for f in (:leg2cheb, :cheb2leg, :ultra2ultra)
+    th_f = Symbol("th_", f)
+    @eval $f(x::AbstractArray, y...; z...) = $th_f(x::AbstractArray, y...; z...)
+end
+
+
 end # module
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index dd42b4ce..c1856cbf 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -435,10 +435,11 @@ for f in (:leg2cheb, :cheb2leg, :ultra2ultra, :jac2jac,
           :sph2fourier, :sphv2fourier, :disk2cxf, :ann2cxf,
           :rectdisk2cheb, :tri2cheb, :tet2cheb)
     plan_f = Symbol("plan_", f)
+    lib_f = Symbol("lib_", f)
     @eval begin
         $plan_f(x::AbstractArray{T}, y...; z...) where T = $plan_f(T, size(x, 1), y...; z...)
         $plan_f(::Type{Complex{T}}, y...; z...) where T <: Real = $plan_f(T, y...; z...)
-        $f(x::AbstractArray, y...; z...) = $plan_f(x, y...; z...)*x
+        $lib_f(x::AbstractArray, y...; z...) = $plan_f(x, y...; z...)*x
     end
 end
 
diff --git a/src/specialfunctions.jl b/src/specialfunctions.jl
index 62b47b16..66ef53c2 100644
--- a/src/specialfunctions.jl
+++ b/src/specialfunctions.jl
@@ -135,7 +135,8 @@ end
 """
 The Lambda function ``\\Lambda(z) = \\frac{\\Gamma(z+\\frac{1}{2})}{\\Gamma(z+1)}`` for the ratio of gamma functions.
 """
-Λ(z::Number) = exp(lgamma(z+half(z))-lgamma(z+one(z)))
+Λ(z::Number) = Λ(z, half(z), one(z))
+
 """
 For 64-bit floating-point arithmetic, the Lambda function uses the asymptotic series for ``\\tau`` in Appendix B of
 
@@ -153,12 +154,18 @@ end
 """
 The Lambda function ``\\Lambda(z,λ₁,λ₂) = \\frac{\\Gamma(z+\\lambda_1)}{Γ(z+\\lambda_2)}`` for the ratio of gamma functions.
 """
-Λ(z::Number,λ₁::Number,λ₂::Number) = exp(lgamma(z+λ₁)-lgamma(z+λ₂))
-function Λ(x::Float64,λ₁::Float64,λ₂::Float64)
+function Λ(z::Real, λ₁::Real, λ₂::Real)
+    if z+λ₁ > 0 && z+λ₂ > 0
+        exp(lgamma(z+λ₁)-lgamma(z+λ₂))
+    else
+        gamma(z+λ₁)/gamma(z+λ₂)
+    end
+end
+function Λ(x::Float64, λ₁::Float64, λ₂::Float64)
     if min(x+λ₁,x+λ₂) ≥ 8.979120323411497
         exp(λ₂-λ₁+(x-.5)*log1p((λ₁-λ₂)/(x+λ₂)))*(x+λ₁)^λ₁/(x+λ₂)^λ₂*stirlingseries(x+λ₁)/stirlingseries(x+λ₂)
     else
-        (x+λ₂)/(x+λ₁)*Λ(x+1.,λ₁,λ₂)
+        (x+λ₂)/(x+λ₁)*Λ(x + 1.0, λ₁, λ₂)
     end
 end
 
diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
new file mode 100644
index 00000000..a5c18572
--- /dev/null
+++ b/src/toeplitzhankel.jl
@@ -0,0 +1,287 @@
+"""
+Store a diagonally-scaled Toeplitz∘Hankel matrix:
+    DL(T∘H)DR
+where the Hankel matrix `H` is non-negative definite. This allows a Cholesky decomposition in 𝒪(K²N) operations and 𝒪(KN) storage, K = log N log ɛ⁻¹.
+"""
+struct ToeplitzHankelPlan{S, N, M, N1, TP<:ToeplitzPlan{S,N1}} <: Plan{S}
+    T::NTuple{M,TP}
+    L::NTuple{M,Matrix{S}}
+    R::NTuple{M,Matrix{S}}
+    tmp::Array{S,N1}
+    dims::NTuple{M,Int}
+    function ToeplitzHankelPlan{S,N,M,N1,TP}(T::NTuple{M,TP}, L, R, dims) where {S,TP,N,N1,M}
+        tmp = Array{S}(undef, max.(size.(T)...)...)
+        new{S,N,M,N1,TP}(T, L, R, tmp, dims)
+    end
+    ToeplitzHankelPlan{S,N,M,N1,TP}(T::NTuple{M,TP}, L, R, dims::Int) where {S,TP,N,N1,M} =
+        ToeplitzHankelPlan{S,N,M,N1,TP}(T, L, R, (dims,))
+end
+
+ToeplitzHankelPlan(T::ToeplitzPlan{S,2}, L::Matrix, R::Matrix, dims=1) where S =
+    ToeplitzHankelPlan{S, 1, 1, 2, typeof(T)}((T,), (L,), (R,), dims)
+
+ToeplitzHankelPlan(T::ToeplitzPlan{S,3}, L::Matrix, R::Matrix, dims) where S =
+    ToeplitzHankelPlan{S, 2, 1,3, typeof(T)}((T,), (L,), (R,), dims)
+
+ToeplitzHankelPlan(T::NTuple{2,TP}, L::Tuple, R::Tuple, dims) where {S,TP<:ToeplitzPlan{S,3}} =
+    ToeplitzHankelPlan{S, 2,2,3, TP}(T, L, R, dims)
+
+
+function *(P::ToeplitzHankelPlan{<:Any,1}, v::AbstractVector)
+    (R,),(L,),(T,),tmp = P.R,P.L,P.T,P.tmp
+    tmp .= R .* v
+    T * tmp
+    tmp .= L .* tmp
+    sum!(v, tmp)
+end
+
+function _th_applymul1!(v, T, L, R, tmp)
+    N = size(R,2)
+    m,n = size(v)
+    tmp[1:m,1:n,1:N] .=  reshape(R,size(R,1),1,N) .* v
+    T * view(tmp,1:m,1:n,1:N)
+    view(tmp,1:m,1:n,1:N) .*=  reshape(L,size(L,1),1,N)
+    sum!(v, view(tmp,1:m,1:n,1:N))
+end
+
+function _th_applymul2!(v, T, L, R, tmp)
+    N = size(R,2)
+    m,n = size(v)
+    tmp[1:m,1:n,1:N] .=  reshape(R,1,size(R,1),N) .* v
+    T * view(tmp,1:m,1:n,1:N)
+    view(tmp,1:m,1:n,1:N) .*=  reshape(L,1,size(L,1),N)
+    sum!(v, view(tmp,1:m,1:n,1:N))
+end
+
+
+function *(P::ToeplitzHankelPlan{<:Any,2,1}, v::AbstractMatrix)
+    (R,),(L,),(T,),tmp = P.R,P.L,P.T,P.tmp
+    if P.dims == (1,)
+        _th_applymul1!(v, T, L, R, tmp)
+    else
+        _th_applymul2!(v, T, L, R, tmp)
+    end
+    v
+end
+
+function *(P::ToeplitzHankelPlan{<:Any,2,2}, v::AbstractMatrix)
+    (R1,R2),(L1,L2),(T1,T2),tmp = P.R,P.L,P.T,P.tmp
+
+    _th_applymul1!(v, T1, L1, R1, tmp)
+    _th_applymul2!(v, T2, L2, R2, tmp)
+
+    v
+end
+
+# partial cholesky for a Hankel matrix
+
+function hankel_partialchol(v::Vector{T}) where T
+    # Assumes positive definite
+    σ = T[]
+    n = isempty(v) ? 0 : (length(v)+2) ÷ 2
+    C = Matrix{T}(undef, n, n)
+    d = v[1:2:end] # diag of H
+    @assert length(v) ≥ 2n-1
+    reltol = maximum(abs,d)*eps(T)*log(n)
+    r = 0
+    for k = 1:n
+        mx,idx = findmax(d)
+        if mx ≤ reltol break end
+        push!(σ, inv(mx))
+        C[:,k] .= view(v,idx:n+idx-1)
+        for j = 1:k-1
+            nCjidxσj = -C[idx,j]*σ[j]
+            LinearAlgebra.axpy!(nCjidxσj, view(C,:,j), view(C,:,k))
+        end
+        @inbounds for p=1:n
+            d[p] -= C[p,k]^2/mx
+        end
+        r += 1
+    end
+    for k=1:length(σ) rmul!(view(C,:,k), sqrt(σ[k])) end
+    C[:,1:r]
+end
+
+
+# Diagonally-scaled Toeplitz∘Hankel polynomial transforms
+
+
+
+struct ChebyshevToLegendrePlanTH{TH}
+    toeplitzhankel::TH
+end
+
+function *(P::ChebyshevToLegendrePlanTH, v::AbstractVector{S}) where S
+    n = length(v)
+    ret = zero(S)
+    @inbounds for k = 1:2:n
+        ret += -v[k]/(k*(k-2))
+    end
+    v[1] = ret
+    P.toeplitzhankel*view(v,2:n)
+    v
+end
+
+function _cheb2leg_rescale1!(V::AbstractMatrix{S}) where S
+    m,n = size(V)
+    for j = 1:n
+        ret = zero(S)
+        @inbounds for k = 1:2:m
+            ret += -V[k,j]/(k*(k-2))
+        end
+        V[1,j] = ret
+    end
+    V
+end
+
+
+function *(P::ChebyshevToLegendrePlanTH, V::AbstractMatrix)
+    m,n = size(V)
+    dims = P.toeplitzhankel.dims
+    if dims == (1,)
+        _cheb2leg_rescale1!(V)
+        P.toeplitzhankel*view(V,2:m,:)
+    elseif dims == (2,)
+        _cheb2leg_rescale1!(transpose(V))
+        P.toeplitzhankel*view(V,:,2:n)
+    else
+        @assert dims == (1,2)
+        (R1,R2),(L1,L2),(T1,T2),tmp = P.toeplitzhankel.R,P.toeplitzhankel.L,P.toeplitzhankel.T,P.toeplitzhankel.tmp
+
+        _cheb2leg_rescale1!(V)
+        _th_applymul1!(view(V,2:m,:), T1, L1, R1, tmp)
+        _cheb2leg_rescale1!(transpose(V))
+        _th_applymul2!(view(V,:,2:n), T2, L2, R2, tmp)
+    end
+    V
+end
+
+
+
+function _leg2chebTH_TLC(::Type{S}, mn, d) where S
+    n = mn[d]
+    λ = Λ.(0:half(real(S)):n-1)
+    t = zeros(S,n)
+    t[1:2:end] .= 2 .* view(λ, 1:2:n) ./ π
+    C = hankel_partialchol(λ)
+    T = plan_uppertoeplitz!(t, (mn..., size(C,2)), d)
+    L = copy(C)
+    L[1,:] ./= 2
+    T,L,C
+end
+
+function _leg2chebuTH_TLC(::Type{S}, mn, d) where {S}
+    n = mn[d]
+    S̃ = real(S)
+    λ = Λ.(0:half(S̃):n-1)
+    t = zeros(S,n)
+    t[1:2:end] = λ[1:2:n]./(((1:2:n).-2))
+    h = λ./((1:2n-1).+1)
+    C = hankel_partialchol(h)
+    T = plan_uppertoeplitz!(-2t/π, (length(t), size(C,2)), 1)
+    (T, (1:n) .* C, C)
+end
+
+
+for f in (:leg2cheb, :leg2chebu)
+    plan = Symbol("plan_th_", f, "!")
+    TLC = Symbol("_", f, "TH_TLC")
+    @eval begin
+        $plan(::Type{S}, mn::Tuple, dims::Int) where {S} = ToeplitzHankelPlan($TLC(S, mn, dims)..., dims)
+
+        function $plan(::Type{S}, mn::NTuple{2,Int}, dims::NTuple{2,Int}) where {S}
+            @assert dims == (1,2)
+            T1,L1,C1 = $TLC(S, mn, 1)
+            T2,L2,C2 = $TLC(S, mn, 2)
+            ToeplitzHankelPlan((T1,T2), (L1,L2), (C1,C2), dims)
+        end
+    end
+end
+
+_sub_dim_by_one(d) = ()
+_sub_dim_by_one(d, m, n...) = (isone(d) ? m-1 : m, _sub_dim_by_one(d-1, n...)...)
+
+function _cheb2legTH_TLC(::Type{S}, mn, d) where S
+    n = mn[d]
+    t = zeros(S,n-1)
+    S̃ = real(S)
+    if n > 1
+        t[1:2:end] = Λ.(0:one(S̃):div(n-2,2), -half(S̃), one(S̃))
+    end
+    h = Λ.(1:half(S̃):n-1, zero(S̃), 3half(S̃))
+    DL = (3half(S̃):n-half(S̃))
+    DR = -(one(S̃):n-one(S̃))./4
+    C = hankel_partialchol(h)
+    T = plan_uppertoeplitz!(t, (_sub_dim_by_one(d, mn...)..., size(C,2)), d)
+    T, DL .* C, DR .* C
+end
+
+plan_th_cheb2leg!(::Type{S}, mn::Tuple, dims::Int) where {S} = ChebyshevToLegendrePlanTH(ToeplitzHankelPlan(_cheb2legTH_TLC(S, mn, dims)..., dims))
+
+function plan_th_cheb2leg!(::Type{S}, mn::NTuple{2,Int}, dims::NTuple{2,Int}) where {S}
+    @assert dims == (1,2)
+    T1,L1,C1 = _cheb2legTH_TLC(S, mn, 1)
+    T2,L2,C2 = _cheb2legTH_TLC(S, mn, 2)
+    ChebyshevToLegendrePlanTH(ToeplitzHankelPlan((T1,T2), (L1,L2), (C1,C2), dims))
+end
+
+function plan_th_ultra2ultra!(::Type{S}, (n,)::Tuple{Int}, λ₁, λ₂) where {S}
+    @assert abs(λ₁-λ₂) < 1
+    S̃ = real(S)
+    DL = (zero(S̃):n-one(S̃)) .+ λ₂
+    jk = 0:half(S̃):n-1
+    t = zeros(S,n)
+    t[1:2:n] = Λ.(jk,λ₁-λ₂,one(S̃))[1:2:n]
+    h = Λ.(jk,λ₁,λ₂+one(S̃))
+    lmul!(gamma(λ₂)/gamma(λ₁),h)
+    C = hankel_partialchol(h)
+    T = plan_uppertoeplitz!(lmul!(inv(gamma(λ₁-λ₂)),t), (length(t), size(C,2)), 1)
+    ToeplitzHankelPlan(T, DL .* C, C)
+end
+
+function alternatesign!(v)
+    @inbounds for k = 2:2:length(v)
+        v[k] = -v[k]
+    end
+    v
+end
+
+function plan_th_jac2jac!(::Type{S}, (n,), α, β, γ, δ) where {S}
+    if β == δ
+        @assert abs(α-γ) < 1
+        @assert α+β > -1
+        jk = 0:n-1
+        DL = (2jk .+ γ .+ β .+ 1).*Λ.(jk,γ+β+1,β+1)
+        t = convert(AbstractVector{S}, Λ.(jk, α-γ,1))
+        h = Λ.(0:2n-2,α+β+1,γ+β+2)
+        DR = Λ.(jk,β+1,α+β+1)./gamma(α-γ)
+        C = hankel_partialchol(h)
+        T = plan_uppertoeplitz!(t, (length(t), size(C,2)), 1)
+    elseif α == γ
+        jk = 0:n-1
+        DL = (2jk .+ δ .+ α .+ 1).*Λ.(jk,δ+α+1,α+1)
+        h = Λ.(0:2n-2,α+β+1,δ+α+2)
+        DR = Λ.(jk,α+1,α+β+1)./gamma(β-δ)
+        t = alternatesign!(convert(AbstractVector{S}, Λ.(jk,β-δ,1)))
+        C = hankel_partialchol(h)
+        T = plan_uppertoeplitz!(t, (length(t), size(C,2)), 1)
+    else
+        throw(ArgumentError("Cannot create Toeplitz dot Hankel, use a sequence of plans."))
+    end
+
+    ToeplitzHankelPlan(T, DL .* C, DR .* C)
+end
+
+for f in (:th_leg2cheb, :th_cheb2leg, :th_leg2chebu)
+    plan = Symbol("plan_", f, "!")
+    @eval begin
+        $plan(::Type{S}, mn::NTuple{N,Int}, dims::UnitRange) where {N,S} = $plan(S, mn, tuple(dims...))
+        $plan(::Type{S}, mn::Tuple{Int}, dims::Tuple{Int}=(1,)) where {S} = $plan(S, mn, dims...)
+        $plan(::Type{S}, (m,n)::NTuple{2,Int}) where {S} = $plan(S, (m,n), (1,2))
+        $plan(arr::AbstractArray{T}, dims...) where T = $plan(T, size(arr), dims...)
+        $f(v, dims...) = $plan(eltype(v), size(v), dims...)*copy(v)
+    end
+end
+
+th_ultra2ultra(v, λ₁, λ₂, dims...) = plan_th_ultra2ultra!(eltype(v),size(v),λ₁,λ₂, dims...)*copy(v)
+th_jac2jac(v, α, β, γ, δ, dims...) = plan_th_jac2jac!(eltype(v),size(v),α,β,γ,δ, dims...)*copy(v)
\ No newline at end of file
diff --git a/src/toeplitzplans.jl b/src/toeplitzplans.jl
new file mode 100644
index 00000000..ab08d1f7
--- /dev/null
+++ b/src/toeplitzplans.jl
@@ -0,0 +1,223 @@
+using FFTW
+import FFTW: plan_r2r!
+
+struct ToeplitzPlan{T, N, M, S, VECS<:Tuple{Vararg{Vector{S}}}, P<:Plan{S}, Pi<:Plan{S}} <: Plan{T}
+    vectors::VECS
+    tmp::Array{S,N}
+    dft::P
+    idft::Pi
+    dims::NTuple{M,Int}
+end
+
+ToeplitzPlan{T}(v::AbstractVector, tmp, dft, idft, dims) where T = ToeplitzPlan{T}((v,), tmp, dft, idft, dims)
+ToeplitzPlan{T}(v::Tuple{Vararg{Vector{S}}}, tmp::Array{S,N}, dft::Plan{S}, idft::Plan{S}, dims::NTuple{M,Int}) where {T,S,N,M} = ToeplitzPlan{T,N,M,S,typeof(v),typeof(dft), typeof(idft)}(v, tmp, dft, idft, dims)
+ToeplitzPlan{T}(v::Tuple{Vararg{Vector{S}}}, tmp::Array{S,N}, dft::Plan{S}, idft::Plan{S}, dims::Int) where {T,S,N} = ToeplitzPlan{T}(v, tmp, dft, idft, (dims,))
+
+size(A::ToeplitzPlan{<:Any,1}) = ((length(A.tmp)+1) ÷ 2,)
+function size(A::ToeplitzPlan{<:Any,2,1})
+    if A.dims == (1,)
+        ((size(A.tmp,1)+1) ÷ 2, size(A.tmp,2))
+    else # A.dims == (2,)
+        (size(A.tmp,1), (size(A.tmp,2)+1) ÷ 2)
+    end
+end
+
+function size(A::ToeplitzPlan{<:Any,3,1})
+    if A.dims == (1,)
+        ((size(A.tmp,1)+1) ÷ 2, size(A.tmp,2), size(A.tmp,3))
+    elseif A.dims == (2,)
+        (size(A.tmp,1), (size(A.tmp,2)+1) ÷ 2, size(A.tmp,3))
+    else
+        (size(A.tmp,1), size(A.tmp,2), (size(A.tmp,3)+1) ÷ 2)
+    end
+end
+
+
+size(A::ToeplitzPlan{<:Any,2,2}) = ((size(A.tmp,1)+1) ÷ 2, (size(A.tmp,2)+1) ÷ 2)
+
+# based on ToeplitzMatrices.jl
+"""
+    maybereal(::Type{T}, x)
+
+Return real-valued part of `x` if `T` is a type of a real number, and `x` otherwise.
+"""
+maybereal(::Type, x) = x
+maybereal(::Type{<:Real}, x) = real(x)
+
+function *(A::ToeplitzPlan{T,1}, x::AbstractVector{T}) where T
+    vc,tmp,dft,idft = A.vectors[1],A.tmp, A.dft,A.idft
+    S = eltype(tmp)
+    N = length(tmp)
+    n = length(x)
+    if 2n-1 ≠ N
+        throw(DimensionMismatch("Toeplitz plan does not match size of input"))
+    end
+    copyto!(view(tmp, 1:n), x)
+    fill!(view(tmp, n+1:N), zero(S))
+    dft * tmp
+    tmp .*= vc
+    idft * tmp
+    @inbounds for k = 1:n
+        x[k] = maybereal(T, tmp[k])
+    end
+    x
+end
+
+function *(A::ToeplitzPlan{T,2,1, S}, x::AbstractMatrix{T}) where {T,S}
+    vc,tmp,dft,idft = A.vectors[1],A.tmp, A.dft, A.idft
+    M,N = size(tmp)
+    m,n = size(x)
+
+    if isempty(x)
+        return x
+    end
+
+    if A.dims == (1,)
+        copyto!(view(tmp, 1:m, :), x)
+        fill!(view(tmp, m+1:M, :), zero(S))
+        if !isempty(tmp)
+            dft * tmp
+        end
+        tmp .= vc .* tmp
+    else
+        @assert A.dims == (2,)
+        copyto!(view(tmp, :, 1:n), x)
+        fill!(view(tmp, :, n+1:N), zero(S))
+        dft * tmp
+        tmp .= tmp .* transpose(vc)
+    end
+    idft * tmp
+    x .= maybereal.(T, view(tmp,1:m,1:n))
+end
+
+
+function *(A::ToeplitzPlan{T,2,2, S}, X::AbstractMatrix{T}) where {T,S}
+    vcs,tmp,dft,idft = A.vectors,A.tmp, A.dft,A.idft
+    vc1,vc2 = vcs
+    M,N = size(tmp)
+    m,n = size(X)
+
+    @assert A.dims == (1,2)
+    copyto!(view(tmp, 1:m, 1:n), X)
+    fill!(view(tmp, m+1:M, :), zero(S))
+    fill!(view(tmp, 1:m, n+1:N), zero(S))
+    dft * tmp
+    tmp .= vc1 .* tmp .* transpose(vc2)
+    idft * tmp
+    @inbounds for k = 1:m, j = 1:n
+        X[k,j] = maybereal(T, tmp[k,j])
+    end
+    X
+end
+
+function *(A::ToeplitzPlan{T,3,1, S}, x::AbstractArray{T,3}) where {T,S}
+    vc,tmp,dft,idft = A.vectors[1],A.tmp, A.dft,A.idft
+    M,N,L = size(tmp)
+    m,n,l = size(x)
+
+    if A.dims == (1,)
+        copyto!(view(tmp, 1:m, :, :), x)
+        fill!(view(tmp, m+1:M, :, :), zero(S))
+        dft * tmp
+        tmp .= vc .* tmp
+    elseif A.dims == (2,)
+        copyto!(view(tmp, :, 1:n, :), x)
+        fill!(view(tmp, :, n+1:N, :), zero(S))
+        dft * tmp
+        tmp .= tmp .* transpose(vc)
+    else
+        copyto!(view(tmp, :, :, 1:l), x)
+        fill!(view(tmp, :, :, l+1:L), zero(S))
+        dft * tmp
+        tmp .= tmp .* reshape(vc, 1, 1, L)
+    end
+    idft * tmp
+    @inbounds for k = 1:m, j = 1:n, ℓ = 1:l
+        x[k,j,ℓ] = maybereal(T, tmp[k,j,ℓ])
+    end
+    x
+end
+
+
+
+function uppertoeplitz_padvec(v::AbstractVector{T}) where T
+    n = length(v)
+    S = complex(float(T))
+    tmp = zeros(S, max(0,2n-1))
+    if n ≠ 0
+        tmp[1] = v[1]
+        copyto!(tmp, n+1, Iterators.reverse(v), 1, n-1)
+    end
+    tmp
+end
+
+function plan_uppertoeplitz!(v::AbstractVector{T}) where T
+    tmp = uppertoeplitz_padvec(v)
+    dft = plan_fft!(tmp)
+    idft = plan_ifft!(similar(tmp))
+    return ToeplitzPlan{float(T)}(dft * tmp, similar(tmp), dft, idft, (1,))
+end
+
+# TODO: support different transforms
+# function plan_uppertoeplitz!(v1::AbstractVector{T}, v2::AbstractVector{T}) where T
+#     S = float(T)
+#     m,n = length(v1), length(v2)
+#     tmp = zeros(S, 2m-1, 2n-1)
+#     pv1 = uppertoeplitz_padvec(v1)
+#     pv2 = uppertoeplitz_padvec(v2)
+#     dft = plan_r2r!(tmp, FFTW.R2HC)
+#     return ToeplitzPlan((r2r!(pv1, FFTW.R2HC), r2r!(pv2, FFTW.R2HC)), tmp, dft, 1:2)
+# end
+
+function plan_uppertoeplitz!(v::AbstractVector{T}, szs::NTuple{2,Int}, dim::Int) where T
+    S = complex(float(T))
+    m,n = szs
+    if isone(dim)
+        tmp = zeros(S, max(0,2m-1), n)
+        pv = uppertoeplitz_padvec(v[1:m])
+    else # dim == 2
+        tmp = zeros(S, m, max(0,2n-1))
+        pv = uppertoeplitz_padvec(v[1:n])
+    end
+    if isempty(tmp)
+        # dummy plans just to create type
+        dft = plan_fft!(similar(tmp, 1, 1), dim)
+        idft = plan_ifft!(similar(tmp, 1, 1), dim)
+        ToeplitzPlan{float(T)}(pv, tmp, dft, idft, dim)
+    else
+        dft = plan_fft!(tmp, dim)
+        idft = plan_ifft!(similar(tmp), dim)
+        return ToeplitzPlan{float(T)}(fft!(pv), tmp, dft, idft, dim)
+    end
+end
+
+function plan_uppertoeplitz!(v::AbstractVector{T}, szs::NTuple{3,Int}, dim::Int) where T
+    S = complex(float(T))
+    m,n,l = szs
+    if isone(dim)
+        tmp = zeros(S, 2m-1, n, l)
+        pv = uppertoeplitz_padvec(v[1:m])
+    elseif dim == 2
+        tmp = zeros(S, m, 2n-1, l)
+        pv = uppertoeplitz_padvec(v[1:n])
+    else
+        @assert dim == 3
+        tmp = zeros(S, m, n, 2l-1)
+        pv = uppertoeplitz_padvec(v[1:l])
+    end
+    dft = plan_fft!(tmp, dim)
+    idft = plan_ifft!(similar(tmp), dim)
+    return ToeplitzPlan{float(T)}(fft!(pv), tmp, dft, idft, dim)
+end
+
+function plan_uppertoeplitz!(v::AbstractVector{T}, szs::NTuple{2,Int}, dim=(1,2)) where T
+    @assert dim == (1,2)
+    S = complex(float(T))
+    m,n = szs
+    tmp = zeros(S, 2m-1, 2n-1)
+    pv1 = uppertoeplitz_padvec(v[1:m])
+    pv2 = uppertoeplitz_padvec(v[1:n])
+    dft = plan_fft!(tmp, dim)
+    idft = plan_ifft!(similar(tmp), dim)
+    return ToeplitzPlan{float(T)}((fft!(pv1), fft!(pv2)), tmp, dft, idft, dim)
+end
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index e224666d..061614cd 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -18,7 +18,7 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         B = T[zero(T) for k in 0:length(c)-1]
         C = T[k/(k+one(T)) for k in 0:length(c)]
         phi0 = ones(T, length(x))
-        c = cheb2leg(c)
+        c = FastTransforms.lib_cheb2leg(c)
         @test FastTransforms.clenshaw!(c, A, B, C, x, phi0, f) == f
         @test f ≈ fd
     end
diff --git a/test/runtests.jl b/test/runtests.jl
index 8a3790a7..1efb21e9 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -8,5 +8,6 @@ include("nuffttests.jl")
 include("paduatests.jl")
 include("gaunttests.jl")
 include("hermitetests.jl")
-include("toeplitztests.jl")
 include("clenshawtests.jl")
+include("toeplitzplanstests.jl")
+include("toeplitzhankeltests.jl")
\ No newline at end of file
diff --git a/test/specialfunctionstests.jl b/test/specialfunctionstests.jl
index febad808..85a2e1f9 100644
--- a/test/specialfunctionstests.jl
+++ b/test/specialfunctionstests.jl
@@ -1,6 +1,6 @@
 using FastTransforms, LinearAlgebra, Test
 
-import FastTransforms: pochhammer, sqrtpi, SpecialFunctions.gamma
+import FastTransforms: pochhammer, sqrtpi, gamma, lgamma
 import FastTransforms: Cnλ, Λ, lambertw, Cnαβ, Anαβ
 import FastTransforms: chebyshevmoments1, chebyshevmoments2, chebyshevjacobimoments1, chebyshevjacobimoments2, chebyshevlogmoments1, chebyshevlogmoments2
 
@@ -36,4 +36,10 @@ import FastTransforms: chebyshevmoments1, chebyshevmoments2, chebyshevjacobimome
 
     @test norm(Cnαβ.(n,α,β) ./ Cnαβ.(n,big(α),big(β)) .- 1, Inf) < 3eps()
     @test norm(Anαβ.(n,α,β) ./ Anαβ.(n,big(α),big(β)) .- 1, Inf) < 4eps()
+
+    @testset "BigFloat bug" begin
+        @test Λ(0.0, -1/2, 1.0) ≈ -exp(lgamma(-1/2)-lgamma(1.0))
+        @test Λ(1.0, -1/2, 1.0) ≈ exp(lgamma(1-1/2)-lgamma(2.0))
+        @test Float64(Λ(big(0.0), -1/2, 1.0)) ≈ Λ(0.0, -1/2, 1.0)
+    end
 end
diff --git a/test/toeplitzhankeltests.jl b/test/toeplitzhankeltests.jl
new file mode 100644
index 00000000..93ce70bf
--- /dev/null
+++ b/test/toeplitzhankeltests.jl
@@ -0,0 +1,40 @@
+using FastTransforms, Test
+import FastTransforms: th_leg2cheb, th_cheb2leg, th_ultra2ultra,th_jac2jac, th_leg2chebu,
+                        lib_leg2cheb, lib_cheb2leg, lib_ultra2ultra, lib_jac2jac,
+                        plan_th_cheb2leg!, plan_th_leg2cheb!
+
+@testset "ToeplitzHankel" begin
+    for x in ([1.0], [1.0,2,3,4,5], [1.0+im,2-3im,3+4im,4-5im,5+10im])
+        @test th_leg2cheb(x) ≈ lib_leg2cheb(x)
+        @test th_cheb2leg(x) ≈ lib_cheb2leg(x)
+        @test th_leg2chebu(x) ≈ lib_ultra2ultra(x, 0.5, 1.0)
+        @test th_ultra2ultra(x,0.1, 0.2) ≈ lib_ultra2ultra(x, 0.1, 0.2)
+        @test th_jac2jac(x,0.1, 0.2,0.1,0.4) ≈ lib_jac2jac(x, 0.1, 0.2,0.1,0.4)
+        @test th_jac2jac(x,0.1, 0.2,0.3,0.2) ≈ lib_jac2jac(x, 0.1, 0.2,0.3,0.2)
+
+        @test all(th_leg2cheb(x) .=== leg2cheb(x))
+        @test all(th_cheb2leg(x) .=== cheb2leg(x))
+    end
+
+    for X in (randn(5,4), randn(5,4) + im*randn(5,4))
+        @test th_leg2cheb(X, 1) ≈ hcat([leg2cheb(X[:,j]) for j=1:size(X,2)]...)
+        @test th_leg2cheb(X, 2) ≈ vcat([permutedims(leg2cheb(X[k,:])) for k=1:size(X,1)]...)
+        @test th_leg2cheb(X) ≈ th_leg2cheb(th_leg2cheb(X, 1), 2)
+
+        @test th_cheb2leg(X, 1) ≈ hcat([cheb2leg(X[:,j]) for j=1:size(X,2)]...)
+        @test th_cheb2leg(X, 2) ≈ vcat([permutedims(cheb2leg(X[k,:])) for k=1:size(X,1)]...)
+        @test th_cheb2leg(X) ≈ th_cheb2leg(th_cheb2leg(X, 1), 2)
+
+        @test th_cheb2leg(X) == plan_th_cheb2leg!(X, 1:2)*copy(X)
+        @test th_leg2cheb(X) == plan_th_leg2cheb!(X, 1:2)*copy(X)
+
+        @test th_leg2cheb(th_cheb2leg(X)) ≈ X
+    end
+
+    @testset "BigFloat" begin
+        n = 10
+        x = big.(collect(1.0:n))
+        @test leg2cheb(x) ≈ lib_leg2cheb(x)
+        @test cheb2leg(x) ≈ lib_cheb2leg(x)
+    end
+end
\ No newline at end of file
diff --git a/test/toeplitzplanstests.jl b/test/toeplitzplanstests.jl
new file mode 100644
index 00000000..e56d8c3e
--- /dev/null
+++ b/test/toeplitzplanstests.jl
@@ -0,0 +1,63 @@
+using FastTransforms, Test
+import FastTransforms: plan_uppertoeplitz!
+
+@testset "ToeplitzPlan" begin
+    @testset "Vector" begin
+        P = plan_uppertoeplitz!([1,2,3])
+        T = [1 2 3; 0 1 2; 0 0 1]
+        x = randn(3)
+        @test P * copy(x) ≈ T * x
+    end
+
+    @testset "Matrix" begin
+        T = [1 2 3; 0 1 2; 0 0 1]
+
+        X = randn(3,3)
+        P = plan_uppertoeplitz!([1,2,3], size(X), 1)
+        @test P * copy(X) ≈ T * X
+        P = plan_uppertoeplitz!([1,2,3], size(X), 2)
+        @test P * copy(X) ≈ X * T'
+
+        P = plan_uppertoeplitz!([1,2,3], size(X))
+        @test P * copy(X) ≈ T * X * T'
+
+        X = randn(3,4)
+        P1 = plan_uppertoeplitz!([1,2,3], size(X), 1)
+        @test P1 * copy(X) ≈ T * X
+        P2 = plan_uppertoeplitz!([1,2,3,4], size(X), 2)
+        T̃ = [1 2 3 4; 0 1 2 3; 0 0 1 2; 0 0 0 1]
+        @test P2 * copy(X) ≈ X * T̃'
+        P = plan_uppertoeplitz!([1,2,3,4], size(X))
+        @test P * copy(X) ≈ T * X * T̃'
+    end
+
+    @testset "Tensor" begin
+        T = [1 2 3; 0 1 2; 0 0 1]
+        
+        X = randn(3,3,3)
+        P = plan_uppertoeplitz!([1,2,3], size(X), 1)
+        PX = P * copy(X)
+        for ℓ = 1:size(X,3)
+            @test PX[:,:,ℓ] ≈ T*X[:,:,ℓ]
+        end
+
+        P = plan_uppertoeplitz!([1,2,3], size(X), 2)
+        PX = P * copy(X)
+        for ℓ = 1:size(X,3)
+            @test PX[:,:,ℓ] ≈ X[:,:,ℓ]*T'
+        end
+
+        P = plan_uppertoeplitz!([1,2,3], size(X), 3)
+        PX = P * copy(X)
+        for j = 1:size(X,2)
+            @test PX[:,j,:] ≈ X[:,j,:]*T'
+        end
+    end
+
+    @testset "BigFloat" begin
+        P = plan_uppertoeplitz!([big(π),2,3])
+        T = [big(π) 2 3; 0 big(π) 2; 0 0 big(π)]
+        x = randn(3)
+        @test P * copy(x) ≈ T * x
+    end
+end
\ No newline at end of file
diff --git a/test/toeplitztests.jl b/test/toeplitztests.jl
deleted file mode 100644
index f11bd6d1..00000000
--- a/test/toeplitztests.jl
+++ /dev/null
@@ -1,26 +0,0 @@
-using FastTransforms, Test, ToeplitzMatrices
-
-@testset "BigFloat Toeplitz" begin
-    T = Toeplitz(BigFloat[1,2,3,4,5], BigFloat[1,6,7,8,0])
-    @test T*ones(BigFloat,5) ≈ [22,24,19,16,15]
-
-    let n = 512
-        r = map(BigFloat,rand(n))
-        T = Toeplitz(r,[r[1];map(BigFloat,rand(n-1))])
-        @test T*ones(BigFloat,n) ≈ Matrix(T)*ones(BigFloat,n)
-
-        T = TriangularToeplitz(BigFloat[1,2,3,4,5],:L)
-        @test T*ones(BigFloat,5) ≈ Matrix(T)*ones(BigFloat,5)
-
-        r = map(BigFloat,rand(n))
-        T = TriangularToeplitz(r,:L)
-        @test T*ones(BigFloat,n) ≈ Matrix(T)*ones(BigFloat,n)
-
-        T = TriangularToeplitz(BigFloat[1,2,3,4,5],:U)
-        @test T*ones(BigFloat,5) ≈ Matrix(T)*ones(BigFloat,5)
-
-        r = map(BigFloat,rand(n))
-        T = TriangularToeplitz(r,:U)
-        @test T*ones(BigFloat,n) ≈ Matrix(T)*ones(BigFloat,n)
-    end
-end

From 4e9779218779c7de65855fa37950267056df9bf6 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 27 Mar 2023 19:13:45 +0100
Subject: [PATCH 132/222] Fix cheb2leg accuracy

---
 src/toeplitzhankel.jl       | 37 ++++++++++++++++++++++++++++++++++---
 test/toeplitzhankeltests.jl | 14 +++++++++++++-
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index a5c18572..97a033c0 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -102,6 +102,36 @@ function hankel_partialchol(v::Vector{T}) where T
     C[:,1:r]
 end
 
+# cholesky for D .* H .* D'
+function hankel_partialchol(v::Vector, D::AbstractVector)
+    T = promote_type(eltype(v), eltype(D))
+    # Assumes positive definite
+    σ = T[]
+    n = isempty(v) ? 0 : (length(v)+2) ÷ 2
+    C = Matrix{T}(undef, n, n)
+    d = v[1:2:end] .* D.^2 # diag of D .* H .* D'
+    @assert length(v) ≥ 2n-1
+    reltol = maximum(abs,d)*eps(T)*log(n)
+    r = 0
+    for k = 1:n
+        mx,idx = findmax(d)
+        if mx ≤ reltol break end
+        push!(σ, inv(mx))
+        C[:,k] .= view(v,idx:n+idx-1) .*D.*D[idx]
+        for j = 1:k-1
+            nCjidxσj = -C[idx,j]*σ[j]
+            LinearAlgebra.axpy!(nCjidxσj, view(C,:,j), view(C,:,k))
+        end
+        @inbounds for p=1:n
+            d[p] -= C[p,k]^2/mx
+        end
+        r += 1
+    end
+    for k=1:length(σ) rmul!(view(C,:,k), sqrt(σ[k])) end
+    C[:,1:r]
+end
+
+
 
 # Diagonally-scaled Toeplitz∘Hankel polynomial transforms
 
@@ -209,9 +239,10 @@ function _cheb2legTH_TLC(::Type{S}, mn, d) where S
         t[1:2:end] = Λ.(0:one(S̃):div(n-2,2), -half(S̃), one(S̃))
     end
     h = Λ.(1:half(S̃):n-1, zero(S̃), 3half(S̃))
-    DL = (3half(S̃):n-half(S̃))
-    DR = -(one(S̃):n-one(S̃))./4
-    C = hankel_partialchol(h)
+    D = 1:one(S):n-1
+    DL = (3half(S̃):n-half(S̃)) ./ D
+    DR = -(one(S̃):n-one(S̃)) ./ (4 .* D)
+    C = hankel_partialchol(h, D)
     T = plan_uppertoeplitz!(t, (_sub_dim_by_one(d, mn...)..., size(C,2)), d)
     T, DL .* C, DR .* C
 end
diff --git a/test/toeplitzhankeltests.jl b/test/toeplitzhankeltests.jl
index 93ce70bf..48f44ee7 100644
--- a/test/toeplitzhankeltests.jl
+++ b/test/toeplitzhankeltests.jl
@@ -4,7 +4,7 @@ import FastTransforms: th_leg2cheb, th_cheb2leg, th_ultra2ultra,th_jac2jac, th_l
                         plan_th_cheb2leg!, plan_th_leg2cheb!
 
 @testset "ToeplitzHankel" begin
-    for x in ([1.0], [1.0,2,3,4,5], [1.0+im,2-3im,3+4im,4-5im,5+10im])
+    for x in ([1.0], [1.0,2,3,4,5], [1.0+im,2-3im,3+4im,4-5im,5+10im], collect(1.0:1000))
         @test th_leg2cheb(x) ≈ lib_leg2cheb(x)
         @test th_cheb2leg(x) ≈ lib_cheb2leg(x)
         @test th_leg2chebu(x) ≈ lib_ultra2ultra(x, 0.5, 1.0)
@@ -14,6 +14,9 @@ import FastTransforms: th_leg2cheb, th_cheb2leg, th_ultra2ultra,th_jac2jac, th_l
 
         @test all(th_leg2cheb(x) .=== leg2cheb(x))
         @test all(th_cheb2leg(x) .=== cheb2leg(x))
+
+        @test th_cheb2leg(th_leg2cheb(x)) ≈ x atol=1E-9
+        @test th_leg2leg(th_cheb2cheb(x)) ≈ x atol=1E-11
     end
 
     for X in (randn(5,4), randn(5,4) + im*randn(5,4))
@@ -37,4 +40,13 @@ import FastTransforms: th_leg2cheb, th_cheb2leg, th_ultra2ultra,th_jac2jac, th_l
         @test leg2cheb(x) ≈ lib_leg2cheb(x)
         @test cheb2leg(x) ≈ lib_cheb2leg(x)
     end
+
+    @testset "jishnub example" begin
+        x = chebyshevpoints(4096);
+        f = x -> cospi(1000x);  
+        y = f.(x);
+        v = cheb2leg(chebyshevtransform(y))
+        @test norm(v - cheb2leg(leg2cheb(v)), Inf) ≤ 1E-13
+        @test norm(v - cheb2leg(leg2cheb(v)))/norm(v) ≤ 1E-14
+    end
 end
\ No newline at end of file

From 13ea614e19a035a8be014423d91ed411ae9bc8a4 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 27 Mar 2023 19:53:58 +0100
Subject: [PATCH 133/222] fix TH tests (#207)

* fix TH tests, only use TH for vectors since higher dimensions will not have a 100k per dimension

* adjust tol

* stick to TH for now with matrices since Lib doesn't have matrix interface

* v0.15.1
---
 Project.toml                |  2 +-
 src/FastTransforms.jl       |  8 ++++++--
 src/toeplitzhankel.jl       |  2 +-
 test/toeplitzhankeltests.jl | 14 +++++++-------
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/Project.toml b/Project.toml
index 8ba51b09..87c26d69 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15"
+version = "0.15.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index dcf15d7e..58ca3a23 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -120,13 +120,17 @@ for f in (:jac2jac,
     :sph2fourier, :sphv2fourier, :disk2cxf, :ann2cxf,
     :rectdisk2cheb, :tri2cheb, :tet2cheb)
     lib_f = Symbol("lib_", f)
-    @eval $f(x::AbstractArray, y...; z...) = $lib_f(x::AbstractArray, y...; z...)
+    @eval $f(x::AbstractArray, y...; z...) = $lib_f(x, y...; z...)
 end
 
 # following use Toeplitz-Hankel to avoid expensive plans
 for f in (:leg2cheb, :cheb2leg, :ultra2ultra)
     th_f = Symbol("th_", f)
-    @eval $f(x::AbstractArray, y...; z...) = $th_f(x::AbstractArray, y...; z...)
+    lib_f = Symbol("lib_", f)
+    @eval begin
+        $f(x::AbstractArray, y...; z...) = $th_f(x, y...; z...)
+        # $f(x::AbstractArray, y...; z...) = $lib_f(x, y...; z...)
+    end
 end
 
 
diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index 97a033c0..ddbea6b8 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -239,7 +239,7 @@ function _cheb2legTH_TLC(::Type{S}, mn, d) where S
         t[1:2:end] = Λ.(0:one(S̃):div(n-2,2), -half(S̃), one(S̃))
     end
     h = Λ.(1:half(S̃):n-1, zero(S̃), 3half(S̃))
-    D = 1:one(S):n-1
+    D = 1:n-1
     DL = (3half(S̃):n-half(S̃)) ./ D
     DR = -(one(S̃):n-one(S̃)) ./ (4 .* D)
     C = hankel_partialchol(h, D)
diff --git a/test/toeplitzhankeltests.jl b/test/toeplitzhankeltests.jl
index 48f44ee7..5f7c4c8d 100644
--- a/test/toeplitzhankeltests.jl
+++ b/test/toeplitzhankeltests.jl
@@ -16,17 +16,17 @@ import FastTransforms: th_leg2cheb, th_cheb2leg, th_ultra2ultra,th_jac2jac, th_l
         @test all(th_cheb2leg(x) .=== cheb2leg(x))
 
         @test th_cheb2leg(th_leg2cheb(x)) ≈ x atol=1E-9
-        @test th_leg2leg(th_cheb2cheb(x)) ≈ x atol=1E-11
+        @test th_leg2cheb(th_cheb2leg(x)) ≈ x atol=1E-10
     end
 
     for X in (randn(5,4), randn(5,4) + im*randn(5,4))
-        @test th_leg2cheb(X, 1) ≈ hcat([leg2cheb(X[:,j]) for j=1:size(X,2)]...)
-        @test th_leg2cheb(X, 2) ≈ vcat([permutedims(leg2cheb(X[k,:])) for k=1:size(X,1)]...)
-        @test th_leg2cheb(X) ≈ th_leg2cheb(th_leg2cheb(X, 1), 2)
+        @test th_leg2cheb(X, 1) ≈ leg2cheb(X, 1) ≈ hcat([leg2cheb(X[:,j]) for j=1:size(X,2)]...)
+        @test th_leg2cheb(X, 2) ≈ leg2cheb(X, 2) ≈ vcat([permutedims(leg2cheb(X[k,:])) for k=1:size(X,1)]...)
+        @test th_leg2cheb(X) ≈ leg2cheb(X) ≈ th_leg2cheb(th_leg2cheb(X, 1), 2)
 
-        @test th_cheb2leg(X, 1) ≈ hcat([cheb2leg(X[:,j]) for j=1:size(X,2)]...)
-        @test th_cheb2leg(X, 2) ≈ vcat([permutedims(cheb2leg(X[k,:])) for k=1:size(X,1)]...)
-        @test th_cheb2leg(X) ≈ th_cheb2leg(th_cheb2leg(X, 1), 2)
+        @test th_cheb2leg(X, 1) ≈ cheb2leg(X, 1) ≈ hcat([cheb2leg(X[:,j]) for j=1:size(X,2)]...)
+        @test th_cheb2leg(X, 2) ≈ cheb2leg(X, 2) ≈ vcat([permutedims(cheb2leg(X[k,:])) for k=1:size(X,1)]...)
+        @test th_cheb2leg(X) ≈ cheb2leg(X) ≈ th_cheb2leg(th_cheb2leg(X, 1), 2)
 
         @test th_cheb2leg(X) == plan_th_cheb2leg!(X, 1:2)*copy(X)
         @test th_leg2cheb(X) == plan_th_leg2cheb!(X, 1:2)*copy(X)

From 6cf4e39063522b1ad1ccb227df96a4a62e0da6ee Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 27 Mar 2023 20:29:41 +0100
Subject: [PATCH 134/222] revert to libfasttransforms as default (#208)

---
 src/FastTransforms.jl       | 19 ++++++++++---------
 test/toeplitzhankeltests.jl | 27 ++++++++++++++-------------
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 58ca3a23..c8017b27 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -118,20 +118,21 @@ for f in (:jac2jac,
     :cheb2jac, :ultra2cheb, :cheb2ultra, :associatedjac2jac,
     :modifiedjac2jac, :modifiedlag2lag, :modifiedherm2herm,
     :sph2fourier, :sphv2fourier, :disk2cxf, :ann2cxf,
-    :rectdisk2cheb, :tri2cheb, :tet2cheb)
+    :rectdisk2cheb, :tri2cheb, :tet2cheb,
+    :leg2cheb, :cheb2leg, :ultra2ultra)
     lib_f = Symbol("lib_", f)
     @eval $f(x::AbstractArray, y...; z...) = $lib_f(x, y...; z...)
 end
 
 # following use Toeplitz-Hankel to avoid expensive plans
-for f in (:leg2cheb, :cheb2leg, :ultra2ultra)
-    th_f = Symbol("th_", f)
-    lib_f = Symbol("lib_", f)
-    @eval begin
-        $f(x::AbstractArray, y...; z...) = $th_f(x, y...; z...)
-        # $f(x::AbstractArray, y...; z...) = $lib_f(x, y...; z...)
-    end
-end
+# for f in (:leg2cheb, :cheb2leg, :ultra2ultra)
+#     th_f = Symbol("th_", f)
+#     lib_f = Symbol("lib_", f)
+#     @eval begin
+#         $f(x::AbstractArray, y...; z...) = $th_f(x, y...; z...)
+#         # $f(x::AbstractArray, y...; z...) = $lib_f(x, y...; z...)
+#     end
+# end
 
 
 end # module
diff --git a/test/toeplitzhankeltests.jl b/test/toeplitzhankeltests.jl
index 5f7c4c8d..a3f8d36c 100644
--- a/test/toeplitzhankeltests.jl
+++ b/test/toeplitzhankeltests.jl
@@ -12,21 +12,22 @@ import FastTransforms: th_leg2cheb, th_cheb2leg, th_ultra2ultra,th_jac2jac, th_l
         @test th_jac2jac(x,0.1, 0.2,0.1,0.4) ≈ lib_jac2jac(x, 0.1, 0.2,0.1,0.4)
         @test th_jac2jac(x,0.1, 0.2,0.3,0.2) ≈ lib_jac2jac(x, 0.1, 0.2,0.3,0.2)
 
-        @test all(th_leg2cheb(x) .=== leg2cheb(x))
-        @test all(th_cheb2leg(x) .=== cheb2leg(x))
 
         @test th_cheb2leg(th_leg2cheb(x)) ≈ x atol=1E-9
         @test th_leg2cheb(th_cheb2leg(x)) ≈ x atol=1E-10
     end
 
     for X in (randn(5,4), randn(5,4) + im*randn(5,4))
-        @test th_leg2cheb(X, 1) ≈ leg2cheb(X, 1) ≈ hcat([leg2cheb(X[:,j]) for j=1:size(X,2)]...)
-        @test th_leg2cheb(X, 2) ≈ leg2cheb(X, 2) ≈ vcat([permutedims(leg2cheb(X[k,:])) for k=1:size(X,1)]...)
-        @test th_leg2cheb(X) ≈ leg2cheb(X) ≈ th_leg2cheb(th_leg2cheb(X, 1), 2)
+        @test th_leg2cheb(X, 1) ≈ hcat([leg2cheb(X[:,j]) for j=1:size(X,2)]...)
+        @test_broken th_leg2cheb(X, 1) ≈ leg2cheb(X, 1)
+        @test th_leg2cheb(X, 2) ≈ vcat([permutedims(leg2cheb(X[k,:])) for k=1:size(X,1)]...)
+        @test_broken th_leg2cheb(X, 2) ≈ leg2cheb(X, 2)
+        @test th_leg2cheb(X) ≈ th_leg2cheb(th_leg2cheb(X, 1), 2)
+        @test_broken th_leg2cheb(X) ≈ leg2cheb(X)
 
-        @test th_cheb2leg(X, 1) ≈ cheb2leg(X, 1) ≈ hcat([cheb2leg(X[:,j]) for j=1:size(X,2)]...)
-        @test th_cheb2leg(X, 2) ≈ cheb2leg(X, 2) ≈ vcat([permutedims(cheb2leg(X[k,:])) for k=1:size(X,1)]...)
-        @test th_cheb2leg(X) ≈ cheb2leg(X) ≈ th_cheb2leg(th_cheb2leg(X, 1), 2)
+        @test th_cheb2leg(X, 1) ≈ hcat([cheb2leg(X[:,j]) for j=1:size(X,2)]...)
+        @test th_cheb2leg(X, 2) ≈ vcat([permutedims(cheb2leg(X[k,:])) for k=1:size(X,1)]...)
+        @test th_cheb2leg(X) ≈ th_cheb2leg(th_cheb2leg(X, 1), 2)
 
         @test th_cheb2leg(X) == plan_th_cheb2leg!(X, 1:2)*copy(X)
         @test th_leg2cheb(X) == plan_th_leg2cheb!(X, 1:2)*copy(X)
@@ -37,16 +38,16 @@ import FastTransforms: th_leg2cheb, th_cheb2leg, th_ultra2ultra,th_jac2jac, th_l
     @testset "BigFloat" begin
         n = 10
         x = big.(collect(1.0:n))
-        @test leg2cheb(x) ≈ lib_leg2cheb(x)
-        @test cheb2leg(x) ≈ lib_cheb2leg(x)
+        @test th_leg2cheb(x) ≈ lib_leg2cheb(x)
+        @test th_cheb2leg(x) ≈ lib_cheb2leg(x)
     end
 
     @testset "jishnub example" begin
         x = chebyshevpoints(4096);
         f = x -> cospi(1000x);  
         y = f.(x);
-        v = cheb2leg(chebyshevtransform(y))
-        @test norm(v - cheb2leg(leg2cheb(v)), Inf) ≤ 1E-13
-        @test norm(v - cheb2leg(leg2cheb(v)))/norm(v) ≤ 1E-14
+        v = th_cheb2leg(chebyshevtransform(y))
+        @test norm(v - th_cheb2leg(th_leg2cheb(v)), Inf) ≤ 1E-13
+        @test norm(v - th_cheb2leg(th_leg2cheb(v)))/norm(v) ≤ 1E-14
     end
 end
\ No newline at end of file

From daafeb3e0eb2ccff8e4a4d5869ef5cfe589ebfe3 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 27 Mar 2023 20:34:30 +0100
Subject: [PATCH 135/222] Add tests for #202 and #203

---
 test/libfasttransformstests.jl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index 061614cd..de9d5f78 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -228,3 +228,9 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
     pa = plan_spinsph_analysis(A, 2)
     test_nd_plans(p, ps, pa, A)
 end
+
+@testset "ultra2ulta bug and cheb2leg normalisation (#202, #203)" begin
+    @test ultra2ultra([0.0, 1.0], 1, 1) == [0,1]
+    @test cheb2leg([0.0, 1.0], normcheb=true) ≈ [0.,sqrt(2/π)]
+    @test cheb2leg([0.0, 1.0], normleg=true) ≈ [0.,sqrt(2/3)]
+end
\ No newline at end of file

From d7be08cbaec7879314701fd706bccf374e046a84 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Tue, 28 Mar 2023 08:57:49 +0100
Subject: [PATCH 136/222] Less allocations in TH (#211)

---
 src/toeplitzhankel.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index ddbea6b8..ff9acc52 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -108,7 +108,7 @@ function hankel_partialchol(v::Vector, D::AbstractVector)
     # Assumes positive definite
     σ = T[]
     n = isempty(v) ? 0 : (length(v)+2) ÷ 2
-    C = Matrix{T}(undef, n, n)
+    C = Matrix{T}(undef, n, 100)
     d = v[1:2:end] .* D.^2 # diag of D .* H .* D'
     @assert length(v) ≥ 2n-1
     reltol = maximum(abs,d)*eps(T)*log(n)
@@ -127,6 +127,7 @@ function hankel_partialchol(v::Vector, D::AbstractVector)
         end
         r += 1
     end
+    r == 100 && error("ranks more than 100 not yet supported")
     for k=1:length(σ) rmul!(view(C,:,k), sqrt(σ[k])) end
     C[:,1:r]
 end

From 9f808caad4feb11f3f64597187e6312eb9068047 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Sat, 1 Apr 2023 21:36:37 +0100
Subject: [PATCH 137/222] CompatHelper: bump compat for FillArrays to 1, (keep
 existing compat) (#215)

Co-authored-by: CompatHelper Julia <compathelper_noreply@julialang.org>
---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 87c26d69..65cff641 100644
--- a/Project.toml
+++ b/Project.toml
@@ -21,7 +21,7 @@ AbstractFFTs = "1.0"
 FFTW = "1.6"
 FastGaussQuadrature = "0.4, 0.5"
 FastTransforms_jll = "0.6.2"
-FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
+FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1"
 GenericFFT = "0.1"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"

From 5369e423a6fcca991c2d314c596042eeb356ced9 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Sat, 1 Apr 2023 21:37:00 +0100
Subject: [PATCH 138/222] v0.15.2

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 65cff641..ce5ab327 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.1"
+version = "0.15.2"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From 14d2bbbcf3478d955bea0bc42a3edeff9385da08 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Tue, 4 Apr 2023 22:02:05 +0100
Subject: [PATCH 139/222] allow starting forwardrecurrence! from a shifted
 start for adaptive (#216)

* allow starting forwardrecurrence! from a shifted start for adaptive

* Update clenshaw.jl
---
 Project.toml    |  2 +-
 src/clenshaw.jl | 10 +++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index ce5ab327..2b22f02f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.2"
+version = "0.15.3"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/clenshaw.jl b/src/clenshaw.jl
index a3e32452..24a1ee59 100644
--- a/src/clenshaw.jl
+++ b/src/clenshaw.jl
@@ -29,7 +29,14 @@ function _forwardrecurrence!(v::AbstractVector, A::AbstractVector, B::AbstractVe
     v[1] = p0
     N == 1 && return v
     v[2] = p1
-    @inbounds for n = 2:N-1
+    _forwardrecurrence!(v, A, B, C, x, 2:N)
+end
+
+function _forwardrecurrence!(v::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x, kr::AbstractUnitRange)
+    n₀, N = first(kr), last(kr)
+    @boundscheck N > length(v) && throw(BoundsError(v, N))
+    p0, p1 = v[n₀-1], v[n₀]
+    @inbounds for n = n₀:N-1
         p1,p0 = _forwardrecurrence_next(n, A, B, C, x, p0, p1),p1
         v[n+1] = p1
     end
@@ -38,6 +45,7 @@ end
 
 
 
+
 forwardrecurrence(N::Integer, A::AbstractVector, B::AbstractVector, C::AbstractVector, x) =
     forwardrecurrence!(Vector{promote_type(eltype(A),eltype(B),eltype(C),typeof(x))}(undef, N), A, B, C, x)
 

From 8d9214403a3a78027cf666143605d59f89198087 Mon Sep 17 00:00:00 2001
From: Claire Foster <chris42f@gmail.com>
Date: Tue, 6 Jun 2023 23:20:35 +1000
Subject: [PATCH 140/222] Fix weird syntax in annulus example (#218)

---
 examples/annulus.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/annulus.jl b/examples/annulus.jl
index 73c19f40..a6d7445e 100644
--- a/examples/annulus.jl
+++ b/examples/annulus.jl
@@ -39,7 +39,7 @@ M = 4N-3
 ρ = 2/3
 
 # The radial grid:
-r = [begin t = (N-n-0.5)/(2N); ct = sinpi(t); st = cospi(t); sqrt(ct^2+ρ^2*st^2) end; for n in 0:N-1]
+r = [begin t = (N-n-0.5)/(2N); ct = sinpi(t); st = cospi(t); sqrt(ct^2+ρ^2*st^2) end for n in 0:N-1]
 
 # The angular grid (mod $\pi$):
 θ = (0:M-1)*2/M

From 0e5214fa3855bb7a00b2ee93a37407ae2926ae7d Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Wed, 14 Jun 2023 02:45:49 +0530
Subject: [PATCH 141/222] Limit FFTW compat to v1.6 (#220)

* Limit FFTW compat to v1.6

* fix fftw to v1.6
---
 .github/workflows/ci.yml | 1 -
 Project.toml             | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7c0aba73..bd93db0d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -11,7 +11,6 @@ jobs:
         version:
           - '1.7'
           - '1'
-          - '^1.9.0-0'
         os:
           - ubuntu-latest
           - macOS-latest
diff --git a/Project.toml b/Project.toml
index 2b22f02f..de834436 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.3"
+version = "0.15.4"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -18,7 +18,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "1.0"
-FFTW = "1.6"
+FFTW = "~1.6"
 FastGaussQuadrature = "0.4, 0.5"
 FastTransforms_jll = "0.6.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1"

From acaf4a1ae59c925781d18b3f64283eedfe2bfc3f Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Thu, 22 Jun 2023 23:49:03 +0530
Subject: [PATCH 142/222] Set FFTW lower compat bound to v1.7 (#221)

* Compatibility with FFTW v1.7

* remove explicit CI test on v1.9

* bump version to v0.15.5

* Add inplace chebyshevtransform tests for small vec

* tests for ChebyshevU
---
 Project.toml              |  4 ++--
 src/chebyshevtransform.jl | 18 ++++++++----------
 test/chebyshevtests.jl    | 30 ++++++++++++++++++++++--------
 3 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/Project.toml b/Project.toml
index de834436..9feabc65 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.4"
+version = "0.15.5"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -18,7 +18,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "1.0"
-FFTW = "~1.6"
+FFTW = "1.7"
 FastGaussQuadrature = "0.4, 0.5"
 FastTransforms_jll = "0.6.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1"
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index c77941d8..11e07010 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -19,11 +19,9 @@ ChebyshevTransformPlan{T,kind}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T
     ChebyshevTransformPlan{T,kind,K,inplace,N,R}(plan)
 
 # jump through some hoops to make inferrable
-@inline kindtuple(N) = NTuple{N,Int32}
-@inline kindtuple(N,region...) = Vector{Int32}
 function plan_chebyshevtransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevTransformPlan{T,1,kindtuple(N,dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevTransformPlan{T,1}(FFTW.plan_r2r!(x, FIRSTKIND, dims...; kws...))
     end
@@ -36,7 +34,7 @@ end
 
 function plan_chebyshevtransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevTransformPlan{T,1,kindtuple(N,dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevTransformPlan{T,1}(FFTW.plan_r2r(x, FIRSTKIND, dims...; kws...))
     end
@@ -250,7 +248,7 @@ inv(P::IChebyshevTransformPlan{T,1}) where {T} = ChebyshevTransformPlan{T,1}(inv
 
 function plan_ichebyshevtransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevTransformPlan{T,1,kindtuple(N,dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevTransformPlan{T,1}(FFTW.plan_r2r!(x, IFIRSTKIND, dims...; kws...))
     end
@@ -262,7 +260,7 @@ end
 
 function plan_ichebyshevtransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevTransformPlan{T,1,kindtuple(N,dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevTransformPlan{T,1}(FFTW.plan_r2r(x, IFIRSTKIND, dims...; kws...))
     end
@@ -390,7 +388,7 @@ ChebyshevUTransformPlan{T,kind}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {
 
 function plan_chebyshevutransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevUTransformPlan{T,1,kindtuple(N,dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevUTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevUTransformPlan{T,1}(FFTW.plan_r2r!(x, UFIRSTKIND, dims...; kws...))
     end
@@ -402,7 +400,7 @@ end
 
 function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevUTransformPlan{T,1,kindtuple(N,dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevUTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevUTransformPlan{T,1}(FFTW.plan_r2r(x, UFIRSTKIND, dims...; kws...))
     end
@@ -511,7 +509,7 @@ IChebyshevUTransformPlan{T,kind}(F::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,
 
 function plan_ichebyshevutransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevUTransformPlan{T,1,kindtuple(N,dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevUTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevUTransformPlan{T,1}(FFTW.plan_r2r!(x, IUFIRSTKIND, dims...; kws...))
     end
@@ -523,7 +521,7 @@ end
 
 function plan_ichebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevUTransformPlan{T,1,kindtuple(N,dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevUTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevUTransformPlan{T,1}(FFTW.plan_r2r(x, IUFIRSTKIND, dims...; kws...))
     end
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index ae438ac0..696c13f0 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -69,10 +69,17 @@ using FastTransforms, Test
             @test g ≈ fcopy
             @test_throws ArgumentError Pi * T[1,2]
 
-            @test chebyshevtransform(T[1]) == T[1]
-            @test ichebyshevtransform(T[1]) == T[1]
-            @test chebyshevtransform(T[]) == T[]
-            @test ichebyshevtransform(T[]) == T[]
+            v = T[1]
+            @test chebyshevtransform(v) == v
+            @test ichebyshevtransform(v) == v
+            @test chebyshevtransform!(v) === v
+            @test ichebyshevtransform!(v) === v
+
+            v = T[]
+            @test chebyshevtransform(v) == v
+            @test ichebyshevtransform(v) == v
+            @test chebyshevtransform!(v) === v
+            @test ichebyshevtransform!(v) === v
         end
     end
     @testset "Chebyshev second kind points <-> first kind coefficients" begin
@@ -192,10 +199,17 @@ using FastTransforms, Test
             @test g ≈ fcopy
             @test_throws ArgumentError Pi * T[1,2]
 
-            @test chebyshevutransform(T[1]) == T[1]
-            @test ichebyshevutransform(T[1]) == T[1]
-            @test chebyshevutransform(T[]) == T[]
-            @test ichebyshevutransform(T[]) == T[]
+            v = T[1]
+            @test chebyshevutransform(v) == v
+            @test ichebyshevutransform(v) == v
+            @test chebyshevutransform!(v) === v
+            @test ichebyshevutransform!(v) === v
+
+            v = T[]
+            @test chebyshevutransform(v) == v
+            @test ichebyshevutransform(v) == v
+            @test chebyshevutransform!(v) === v
+            @test ichebyshevutransform!(v) === v
         end
     end
     @testset "Chebyshev second kind points <-> second kind coefficients" begin

From b44924e323bac83e11d0e6b61cc21384f443e3b5 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Tue, 11 Jul 2023 14:39:25 +0530
Subject: [PATCH 143/222] separate out windows CI tasks (#225)

* separate out windows CI tasks

* use if contidional

* revert if conditional

* split windows tests and docs to files
---
 .github/workflows/CIWindows.yml | 40 +++++++++++++++++++++++++++++++++
 .github/workflows/ci.yml        | 24 +++-----------------
 .github/workflows/docs.yml      | 22 ++++++++++++++++++
 3 files changed, 65 insertions(+), 21 deletions(-)
 create mode 100644 .github/workflows/CIWindows.yml
 create mode 100644 .github/workflows/docs.yml

diff --git a/.github/workflows/CIWindows.yml b/.github/workflows/CIWindows.yml
new file mode 100644
index 00000000..b0657745
--- /dev/null
+++ b/.github/workflows/CIWindows.yml
@@ -0,0 +1,40 @@
+name: CI Windows
+on:
+  - push
+  - pull_request
+jobs:
+  testwindows:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        version:
+          - '1'
+        os:
+          - windows-latest
+        arch:
+          - x86
+          - x64
+    steps:
+      - uses: actions/checkout@v3
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+          show-versioninfo: true
+      - uses: actions/cache@v3
+        env:
+          cache-name: cache-artifacts
+        with:
+          path: ~/.julia/artifacts
+          key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
+          restore-keys: |
+            ${{ runner.os }}-test-${{ env.cache-name }}-
+            ${{ runner.os }}-test-
+            ${{ runner.os }}-
+      - uses: julia-actions/julia-buildpkg@latest
+      - uses: julia-actions/julia-runtest@latest
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v3
+        with:
+          file: lcov.info
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bd93db0d..f926658f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,7 +14,6 @@ jobs:
         os:
           - ubuntu-latest
           - macOS-latest
-          - windows-latest
         arch:
           - x86
           - x64
@@ -22,13 +21,13 @@ jobs:
           - os: macOS-latest
             arch: x86
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
           show-versioninfo: true
-      - uses: actions/cache@v1
+      - uses: actions/cache@v3
         env:
           cache-name: cache-artifacts
         with:
@@ -41,23 +40,6 @@ jobs:
       - uses: julia-actions/julia-buildpkg@latest
       - uses: julia-actions/julia-runtest@latest
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v3
         with:
           file: lcov.info
-  docs:
-    name: Documentation
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-      - uses: julia-actions/setup-julia@v1
-        with:
-          version: '1.7'
-      - run: |
-          julia --project=docs -e '
-            using Pkg
-            Pkg.develop(PackageSpec(path=pwd()))
-            Pkg.instantiate()'
-      - run: julia --project=docs docs/make.jl
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 00000000..ac73ac6a
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,22 @@
+name: Documentation
+on:
+  - push
+  - pull_request
+jobs:
+  docs:
+    name: Documentation
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: '1'
+      - run: |
+          julia --project=docs -e '
+            using Pkg
+            Pkg.develop(PackageSpec(path=pwd()))
+            Pkg.instantiate()'
+      - run: julia --project=docs docs/make.jl
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}

From 81376fe18e81340befc8643d897b127ee9e86871 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 12 Jul 2023 11:33:14 -0500
Subject: [PATCH 144/222] update references and docs yaml

---
 .github/workflows/docs.yml |  7 +------
 README.md                  | 12 +++++++-----
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index ac73ac6a..c4f06471 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -11,12 +11,7 @@ jobs:
       - uses: julia-actions/setup-julia@v1
         with:
           version: '1'
-      - run: |
-          julia --project=docs -e '
-            using Pkg
-            Pkg.develop(PackageSpec(path=pwd()))
-            Pkg.instantiate()'
-      - run: julia --project=docs docs/make.jl
+      - uses: julia-actions/julia-docdeploy@releases/v1
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/README.md b/README.md
index f2d74a2e..4f736a89 100644
--- a/README.md
+++ b/README.md
@@ -157,12 +157,14 @@ julia> @time norm(ipaduatransform(paduatransform(v)) - v)/norm(v)
 
 ```
 
-# References:
+# References
 
-   [1]  D. Ruiz—Antolín and A. Townsend. <a href="https://doi.org/10.1137/17M1134822">A nonuniform fast Fourier transform based on low rank approximation</a>, *SIAM J. Sci. Comput.*, **40**:A529–A547, 2018.
+[1]  D. Ruiz—Antolín and A. Townsend, [A nonuniform fast Fourier transform based on low rank approximation](https://doi.org/10.1137/17M1134822), *SIAM J. Sci. Comput.*, **40**:A529–A547, 2018.
 
-   [2] S. Olver, R. M. Slevinsky, and A. Townsend. <a href="https://doi.org/10.1017/S0962492920000045">Fast algorithms using orthogonal polynomials</a>, *Acta Numerica*, **29**:573—699, 2020.
+[2] T. S. Gutleb, S. Olver and R. M. Slevinsky, [Polynomial and rational measure modifications of orthogonal polynomials via infinite-dimensional banded matrix factorizations](https://arxiv.org/abs/2302.08448), arXiv:2023.08448, 2023.
 
-   [3]  R. M. Slevinsky. <a href="https://doi.org/10.1016/j.acha.2017.11.001">Fast and backward stable transforms between spherical harmonic expansions and bivariate Fourier series</a>, *Appl. Comput. Harmon. Anal.*, **47**:585—606, 2019.
+[3] S. Olver, R. M. Slevinsky, and A. Townsend, [Fast algorithms using orthogonal polynomials](https://doi.org/10.1017/S0962492920000045), *Acta Numerica*, **29**:573—699, 2020.
 
-   [4]  R. M. Slevinsky, <a href="https://arxiv.org/abs/1711.07866">Conquering the pre-computation in two-dimensional harmonic polynomial transforms</a>, arXiv:1711.07866, 2017.
+[4]  R. M. Slevinsky, [Fast and backward stable transforms between spherical harmonic expansions and bivariate Fourier series](https://doi.org/10.1016/j.acha.2017.11.001), *Appl. Comput. Harmon. Anal.*, **47**:585—606, 2019.
+
+[5]  R. M. Slevinsky, [Conquering the pre-computation in two-dimensional harmonic polynomial transforms](https://arxiv.org/abs/1711.07866), arXiv:1711.07866, 2017.

From b61ce1883031ba6881d2b0501162fa3910e04592 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 13 Jul 2023 09:48:00 -0500
Subject: [PATCH 145/222] reduce tagbot runs

---
 .github/workflows/TagBot.yml | 8 ++++++--
 Project.toml                 | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
index d77d3a0c..f49313b6 100644
--- a/.github/workflows/TagBot.yml
+++ b/.github/workflows/TagBot.yml
@@ -1,11 +1,15 @@
 name: TagBot
 on:
-  schedule:
-    - cron: 0 * * * *
+  issue_comment:
+    types:
+      - created
+  workflow_dispatch:
 jobs:
   TagBot:
+    if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
     runs-on: ubuntu-latest
     steps:
       - uses: JuliaRegistries/TagBot@v1
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
+          ssh: ${{ secrets.DOCUMENTER_KEY }}
diff --git a/Project.toml b/Project.toml
index 9feabc65..34c7af00 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.5"
+version = "0.15.6"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From c2eb7fd4710dd73f1b5946e8135e3931dfc86e47 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 13 Jul 2023 10:36:00 -0500
Subject: [PATCH 146/222] link back to the repository from docs

---
 docs/src/dev.md   | 2 +-
 docs/src/index.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/dev.md b/docs/src/dev.md
index d1aadc1e..80971817 100644
--- a/docs/src/dev.md
+++ b/docs/src/dev.md
@@ -1,6 +1,6 @@
 # Development Documentation
 
-The core of `FastTransforms.jl` is developed in parallel with the [C library](https://github.com/MikaelSlevinsky/FastTransforms) of the same name. Julia and C interoperability is enhanced by the [BinaryBuilder](https://github.com/JuliaPackaging/BinaryBuilder.jl) infrastructure, which provides the user a safe and seamless experience using a package in a different language.
+The core of [`FastTransforms.jl`](https://github.com/JuliaApproximation/FastTransforms.jl) is developed in parallel with the [C library](https://github.com/MikaelSlevinsky/FastTransforms) of the same name. Julia and C interoperability is enhanced by the [BinaryBuilder](https://github.com/JuliaPackaging/BinaryBuilder.jl) infrastructure, which provides the user a safe and seamless experience using a package in a different language.
 
 ## Why two packages?
 
diff --git a/docs/src/index.md b/docs/src/index.md
index 39fc3a4b..fb37e2ef 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -2,7 +2,7 @@
 
 ## Introduction
 
-`FastTransforms.jl` allows the user to conveniently work with orthogonal polynomials with degrees well into the millions.
+[`FastTransforms.jl`](https://github.com/JuliaApproximation/FastTransforms.jl) allows the user to conveniently work with orthogonal polynomials with degrees well into the millions.
 
 This package provides a Julia wrapper for the [C library](https://github.com/MikaelSlevinsky/FastTransforms) of the same name. Additionally, all three types of nonuniform fast Fourier transforms available, as well as the Padua transform.
 

From 490ce9a7b521849dedec8efbafb7c74462a5be31 Mon Sep 17 00:00:00 2001
From: Mikael Slevinsky <Richard.Slevinsky@umanitoba.ca>
Date: Mon, 24 Jul 2023 11:23:57 -0600
Subject: [PATCH 147/222] fix arxiv reference

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4f736a89..ac08bd9e 100644
--- a/README.md
+++ b/README.md
@@ -161,7 +161,7 @@ julia> @time norm(ipaduatransform(paduatransform(v)) - v)/norm(v)
 
 [1]  D. Ruiz—Antolín and A. Townsend, [A nonuniform fast Fourier transform based on low rank approximation](https://doi.org/10.1137/17M1134822), *SIAM J. Sci. Comput.*, **40**:A529–A547, 2018.
 
-[2] T. S. Gutleb, S. Olver and R. M. Slevinsky, [Polynomial and rational measure modifications of orthogonal polynomials via infinite-dimensional banded matrix factorizations](https://arxiv.org/abs/2302.08448), arXiv:2023.08448, 2023.
+[2] T. S. Gutleb, S. Olver and R. M. Slevinsky, [Polynomial and rational measure modifications of orthogonal polynomials via infinite-dimensional banded matrix factorizations](https://arxiv.org/abs/2302.08448), arXiv:2302.08448, 2023.
 
 [3] S. Olver, R. M. Slevinsky, and A. Townsend, [Fast algorithms using orthogonal polynomials](https://doi.org/10.1017/S0962492920000045), *Acta Numerica*, **29**:573—699, 2020.
 

From fde025f2fe3643e8673a098fed02bc0d804dc7ed Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Fri, 4 Aug 2023 13:28:21 +0400
Subject: [PATCH 148/222] Split y coordinate computation in `paduapoints`
 (#226)

* split y coordinate computation in paduapoints

* Add inbounds and tests

* Bump version to v0.15.7
---
 Project.toml          |  2 +-
 src/PaduaTransform.jl | 41 +++++++++++++++++++++++++++++++++--------
 test/paduatests.jl    | 13 +++++++++++++
 3 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/Project.toml b/Project.toml
index 34c7af00..00bb0171 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.6"
+version = "0.15.7"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/PaduaTransform.jl b/src/PaduaTransform.jl
index 6188e74d..c7670924 100644
--- a/src/PaduaTransform.jl
+++ b/src/PaduaTransform.jl
@@ -209,21 +209,46 @@ function paduapoints(::Type{T}, n::Integer) where T
     MM=Matrix{T}(undef,N,2)
     m=0
     delta=0
-    NN=fld(n+2,2)
-    @inbounds for k=n:-1:0
-        if isodd(n)>0
-            delta=mod(k,2)
+    NN=div(n,2)+1
+    # x coordinates
+    for k=n:-1:0
+        if isodd(n)
+            delta = Int(isodd(k))
         end
+        x = -cospi(T(k)/n)
         @inbounds for j=NN+delta:-1:1
             m+=1
-            MM[m,1]=sinpi(T(k)/n-T(0.5))
-            if isodd(n-k)>0
-                MM[m,2]=sinpi((2j-one(T))/(n+1)-T(0.5))
+            MM[m,1]=x
+        end
+    end
+    # y coordinates
+    # populate the first two sets, and copy the rest
+    m=0
+    for k=n:-1:n-1
+        if isodd(n)
+            delta = Int(isodd(k))
+        end
+        for j=NN+delta:-1:1
+            m+=1
+            @inbounds if isodd(n-k)
+                MM[m,2]=-cospi((2j-one(T))/(n+1))
             else
-                MM[m,2]=sinpi(T(2j-2)/(n+1)-T(0.5))
+                MM[m,2]=-cospi(T(2j-2)/(n+1))
             end
         end
     end
+    m += 1
+    # number of y coordinates between k=n and k=n-2
+    Ny_shift = 2NN+isodd(n)
+    for k in n-2:-1:0
+        if isodd(n)
+            delta = Int(isodd(k))
+        end
+        for j in range(m, length=NN+delta)
+            @inbounds MM[j,2] = MM[j-Ny_shift,2]
+        end
+        m += NN+delta
+    end
     return MM
 end
 
diff --git a/test/paduatests.jl b/test/paduatests.jl
index cc46d462..c82dc579 100644
--- a/test/paduatests.jl
+++ b/test/paduatests.jl
@@ -53,4 +53,17 @@ using FastTransforms, Test
     g_l=paduaeval(g_xy,x,y,l,Val{false})
     @test f_xy(x,y) ≈ f_m
     @test g_xy(x,y) ≈ g_l
+
+    # odd n
+    m=135
+    l=85
+    f_m=paduaeval(f_xy,x,y,m,Val{true})
+    g_l=paduaeval(g_xy,x,y,l,Val{true})
+    @test f_xy(x,y) ≈ f_m
+    @test g_xy(x,y) ≈ g_l
+
+    f_m=paduaeval(f_xy,x,y,m,Val{false})
+    g_l=paduaeval(g_xy,x,y,l,Val{false})
+    @test f_xy(x,y) ≈ f_m
+    @test g_xy(x,y) ≈ g_l
 end

From 6fd000cf130b624b9a65a55127dafd03fe7adf37 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 3 Oct 2023 11:34:41 +0100
Subject: [PATCH 149/222] CompatHelper: bump compat for FastGaussQuadrature to
 1, (keep existing compat) (#227)

* CompatHelper: bump compat for FastGaussQuadrature to 1, (keep existing compat)

* v0.15.8

---------

Co-authored-by: CompatHelper Julia <compathelper_noreply@julialang.org>
Co-authored-by: Sheehan Olver <solver@mac.com>
---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 00bb0171..581a185c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.7"
+version = "0.15.8"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -19,7 +19,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 [compat]
 AbstractFFTs = "1.0"
 FFTW = "1.7"
-FastGaussQuadrature = "0.4, 0.5"
+FastGaussQuadrature = "0.4, 0.5, 1"
 FastTransforms_jll = "0.6.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1"
 GenericFFT = "0.1"

From 50282780c8a377ae700109a5e76df7e81b3c14f1 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Sat, 21 Oct 2023 17:36:24 +0100
Subject: [PATCH 150/222] matrix th_ultra2ultra, th_jac2jac (#228)

* matrix th_ultra2ultra

* leg2chebu tests

* jac2jac matrix

* support composing plans in Toeplitz-Hankel

* Mape type stable via vector plans

* prepare for integer increments

* integer increments with vectors

* fix complexity of integer increment

* JacJacPlan remembers dims

* matrix raising

* Fix Chebyshev transform

* higher th_ultra2ultra

* no-alloc jacobi integer step

* fix tests

* fix nearest integer
---
 Project.toml                |   2 +-
 src/toeplitzhankel.jl       | 400 ++++++++++++++++++++++++++++++++++--
 test/toeplitzhankeltests.jl |  74 ++++++-
 3 files changed, 455 insertions(+), 21 deletions(-)

diff --git a/Project.toml b/Project.toml
index 581a185c..8a132703 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.8"
+version = "0.15.9"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index ff9acc52..eb11994e 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -1,7 +1,20 @@
 """
-Store a diagonally-scaled Toeplitz∘Hankel matrix:
+Represent a scaled Toeplitz∘Hankel matrix:
+
     DL(T∘H)DR
-where the Hankel matrix `H` is non-negative definite. This allows a Cholesky decomposition in 𝒪(K²N) operations and 𝒪(KN) storage, K = log N log ɛ⁻¹.
+
+where the Hankel matrix `H` is non-negative definite, via
+
+    ∑_{k=1}^r Diagonal(L[:,k])*T*Diagonal(R[:,k])
+
+where `L` and `R` are determined by doing a rank-r pivoted Cholesky decomposition of `H`, which in low rank form is
+
+    H ≈ ∑_{k=1}^r C[:,k]C[:,k]'
+
+so that `L[:,k] = DL*C[:,k]` and `R[:,k] = DR*C[:,k]`.
+
+This allows a Cholesky decomposition in 𝒪(K²N) operations and 𝒪(KN) storage, K = log N log ɛ⁻¹.
+The tuple storage allows plans applied to each dimension.
 """
 struct ToeplitzHankelPlan{S, N, M, N1, TP<:ToeplitzPlan{S,N1}} <: Plan{S}
     T::NTuple{M,TP}
@@ -209,7 +222,7 @@ function _leg2chebuTH_TLC(::Type{S}, mn, d) where {S}
     t[1:2:end] = λ[1:2:n]./(((1:2:n).-2))
     h = λ./((1:2n-1).+1)
     C = hankel_partialchol(h)
-    T = plan_uppertoeplitz!(-2t/π, (length(t), size(C,2)), 1)
+    T = plan_uppertoeplitz!(-2t/π, (mn..., size(C,2)), d)
     (T, (1:n) .* C, C)
 end
 
@@ -229,6 +242,10 @@ for f in (:leg2cheb, :leg2chebu)
     end
 end
 
+###
+# th_cheb2leg
+###
+
 _sub_dim_by_one(d) = ()
 _sub_dim_by_one(d, m, n...) = (isone(d) ? m-1 : m, _sub_dim_by_one(d-1, n...)...)
 
@@ -257,7 +274,38 @@ function plan_th_cheb2leg!(::Type{S}, mn::NTuple{2,Int}, dims::NTuple{2,Int}) wh
     ChebyshevToLegendrePlanTH(ToeplitzHankelPlan((T1,T2), (L1,L2), (C1,C2), dims))
 end
 
-function plan_th_ultra2ultra!(::Type{S}, (n,)::Tuple{Int}, λ₁, λ₂) where {S}
+
+###
+# th_ultra2ultra
+###
+
+# The second case handles zero
+isapproxinteger(::Integer) = true
+isapproxinteger(x) = isinteger(x) || x ≈ round(Int,x)  || x+1 ≈ round(Int,x+1)
+
+
+_nearest_jacobi_par(α, γ) = isapproxinteger(α-γ) ? α : round(Int,α,RoundDown) + mod(γ,1)
+
+
+struct Ultra2UltraPlanTH{T, Plans, Dims} <: Plan{T}
+    plans::Plans
+    λ₁::T
+    λ₂::T
+    dims::Dims
+end
+
+function *(P::Ultra2UltraPlanTH, A::AbstractArray)
+    ret = A
+    for p in P.plans
+        ret = p*ret
+    end
+    c = _nearest_jacobi_par(P.λ₁, P.λ₂)
+
+    _ultra2ultra_integerinc!(ret, c, P.λ₂, P.dims)
+end
+
+function _ultra2ultraTH_TLC(::Type{S}, mn, λ₁, λ₂, d) where {S}
+    n = mn[d]
     @assert abs(λ₁-λ₂) < 1
     S̃ = real(S)
     DL = (zero(S̃):n-one(S̃)) .+ λ₂
@@ -267,8 +315,151 @@ function plan_th_ultra2ultra!(::Type{S}, (n,)::Tuple{Int}, λ₁, λ₂) where {
     h = Λ.(jk,λ₁,λ₂+one(S̃))
     lmul!(gamma(λ₂)/gamma(λ₁),h)
     C = hankel_partialchol(h)
-    T = plan_uppertoeplitz!(lmul!(inv(gamma(λ₁-λ₂)),t), (length(t), size(C,2)), 1)
-    ToeplitzHankelPlan(T, DL .* C, C)
+    T = plan_uppertoeplitz!(lmul!(inv(gamma(λ₁-λ₂)),t), (mn..., size(C,2)), d)
+    T, DL .* C, C
+end
+
+_good_plan_th_ultra2ultra!(::Type{S}, mn, λ₁, λ₂, dims::Int) where S = ToeplitzHankelPlan(_ultra2ultraTH_TLC(S, mn, λ₁, λ₂, dims)..., dims)
+
+function _good_plan_th_ultra2ultra!(::Type{S}, mn::NTuple{2,Int}, λ₁, λ₂, dims::NTuple{2,Int}) where S
+    T1,L1,C1 = _ultra2ultraTH_TLC(S, mn, λ₁, λ₂, 1)
+    T2,L2,C2 = _ultra2ultraTH_TLC(S, mn, λ₁, λ₂, 2)
+    ToeplitzHankelPlan((T1,T2), (L1,L2), (C1,C2), dims)
+end
+
+
+
+function plan_th_ultra2ultra!(::Type{S}, mn, λ₁, λ₂, dims) where {S}
+    c = _nearest_jacobi_par(λ₁, λ₂)
+
+    if isapproxinteger(λ₂ - λ₁)
+        # TODO: don't make extra plan
+        plans = typeof(_good_plan_th_ultra2ultra!(S, mn, λ₁+0.1, λ₂, dims))[]
+    else
+        plans = [_good_plan_th_ultra2ultra!(S, mn, λ₁, c, dims)]
+    end
+
+    Ultra2UltraPlanTH(plans, λ₁, λ₂, dims)
+end
+
+function _ultra_raise!(B, λ)
+    m, n = size(B, 1), size(B, 2)
+
+    if m > 1
+        @inbounds for j = 1:n
+            for i = 1:m-2
+                Bij = λ / (i+λ-1) * B[i,j]
+                Bij += -λ / (i+λ+1) * B[i+2,j]
+                B[i,j] = Bij
+            end
+            B[m-1,j] = λ / (m+λ-2)*B[m-1,j]
+            B[m,j] = λ / (m+λ-1)*B[m,j]
+        end
+    end
+    B
+end
+
+function _ultra_lower!(B, λ)
+    m, n = size(B, 1), size(B, 2)
+
+    if m > 1
+        @inbounds for j = 1:n
+            B[m,j] = (m+λ-1)/λ * B[m,j]
+            B[m-1,j] = (m+λ-2)/λ *B[m-1,j]
+            for i = m-2:-1:1
+                Bij = B[i,j] + λ / (i+λ+1) * B[i+2,j]
+                B[i,j] = (i+λ-1)/λ * Bij
+            end  
+        end
+    end
+    B
+end
+
+
+
+function _ultra_raise!(x, λ, dims)
+    for d in dims
+        if d == 1
+            _ultra_raise!(x, λ)
+        else
+            _ultra_raise!(x', λ)
+        end
+    end
+    x
+end
+
+function _ultra_lower!(x, λ, dims)
+    for d in dims
+        if d == 1
+            _ultra_lower!(x, λ-1)
+        else
+            _ultra_lower!(x', λ-1)
+        end
+    end
+    x
+end
+
+function _ultra2ultra_integerinc!(x, λ₁, λ₂, dims)
+    while !(λ₁ ≈ λ₂)
+        if λ₂ > λ₁
+            _ultra_raise!(x, λ₁, dims)
+            λ₁ += 1
+        else
+            _ultra_lower!(x, λ₁, dims)
+            λ₁ -= 1
+        end
+    end
+    x
+end
+
+###
+# th_jac2jac
+###
+
+
+function _lmul!(A::Bidiagonal, B::AbstractVecOrMat)
+    @assert A.uplo == 'U'
+    
+    m, n = size(B, 1), size(B, 2)
+    if m != size(A, 1)
+        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
+    end
+    @inbounds for j = 1:n
+        for i = 1:m-1
+            Bij = A.dv[i]*B[i,j]
+            Bij += A.ev[i]*B[i+1,j]
+            B[i,j] = Bij
+        end
+        B[m,j] = A.dv[m]*B[m,j]
+    end
+    B
+end
+
+struct Jac2JacPlanTH{T, Plans, Dims} <: Plan{T}
+    plans::Plans
+    α::T
+    β::T
+    γ::T
+    δ::T
+    dims::Dims
+end
+
+Jac2JacPlanTH(plans, α, β, γ, δ, dims) = Jac2JacPlanTH(plans, promote(α, β, γ, δ)..., dims)
+
+function *(P::Jac2JacPlanTH, A::AbstractArray)
+    if P.α + P.β ≤ -1
+        _jacobi_raise_a!(A, P.α, P.β)
+        c,d = _nearest_jacobi_par(P.α+1, P.γ), _nearest_jacobi_par(P.β, P.δ)
+    else
+        c,d = _nearest_jacobi_par(P.α, P.γ), _nearest_jacobi_par(P.β, P.δ)
+    end
+
+    ret = A
+    for p in P.plans
+        ret = p*ret
+    end
+
+    _jac2jac_integerinc!(ret, c, d, P.γ, P.δ, P.dims)
 end
 
 function alternatesign!(v)
@@ -278,32 +469,206 @@ function alternatesign!(v)
     v
 end
 
-function plan_th_jac2jac!(::Type{S}, (n,), α, β, γ, δ) where {S}
+function _jac2jacTH_TLC(::Type{S}, mn, α, β, γ, δ, d) where {S}
+    n = mn[d]
+    @assert α+β > -1
     if β == δ
         @assert abs(α-γ) < 1
-        @assert α+β > -1
         jk = 0:n-1
         DL = (2jk .+ γ .+ β .+ 1).*Λ.(jk,γ+β+1,β+1)
         t = convert(AbstractVector{S}, Λ.(jk, α-γ,1))
         h = Λ.(0:2n-2,α+β+1,γ+β+2)
         DR = Λ.(jk,β+1,α+β+1)./gamma(α-γ)
         C = hankel_partialchol(h)
-        T = plan_uppertoeplitz!(t, (length(t), size(C,2)), 1)
+        T = plan_uppertoeplitz!(t, (mn..., size(C,2)), d)
     elseif α == γ
+        @assert abs(β-δ) < 1
         jk = 0:n-1
         DL = (2jk .+ δ .+ α .+ 1).*Λ.(jk,δ+α+1,α+1)
         h = Λ.(0:2n-2,α+β+1,δ+α+2)
         DR = Λ.(jk,α+1,α+β+1)./gamma(β-δ)
         t = alternatesign!(convert(AbstractVector{S}, Λ.(jk,β-δ,1)))
         C = hankel_partialchol(h)
-        T = plan_uppertoeplitz!(t, (length(t), size(C,2)), 1)
+        T = plan_uppertoeplitz!(t, (mn..., size(C,2)), d)
     else
         throw(ArgumentError("Cannot create Toeplitz dot Hankel, use a sequence of plans."))
     end
 
-    ToeplitzHankelPlan(T, DL .* C, DR .* C)
+    (T, DL .* C, DR .* C)
+end
+
+_good_plan_th_jac2jac!(::Type{S}, mn, α, β, γ, δ, dims::Int) where S = ToeplitzHankelPlan(_jac2jacTH_TLC(S, mn, α, β, γ, δ, dims)..., dims)
+
+function _good_plan_th_jac2jac!(::Type{S}, mn::NTuple{2,Int}, α, β, γ, δ, dims::NTuple{2,Int}) where S
+    T1,L1,C1 = _jac2jacTH_TLC(S, mn, α, β, γ, δ, 1)
+    T2,L2,C2 = _jac2jacTH_TLC(S, mn, α, β, γ, δ, 2)
+    ToeplitzHankelPlan((T1,T2), (L1,L2), (C1,C2), dims)
+end
+
+
+
+function plan_th_jac2jac!(::Type{S}, mn, α, β, γ, δ, dims) where {S}
+    if α + β ≤ -1
+        c,d = _nearest_jacobi_par(α+1, γ), _nearest_jacobi_par(β, δ)
+    else
+        c,d = _nearest_jacobi_par(α, γ), _nearest_jacobi_par(β, δ)
+    end
+
+    if isapproxinteger(β - δ) && isapproxinteger(α-γ)
+        # TODO: don't make extra plan
+        plans = typeof(_good_plan_th_jac2jac!(S, mn, α+0.1, β, α, β, dims))[]
+    elseif isapproxinteger(α - γ) || isapproxinteger(β - δ)
+        if α + β ≤ -1
+            # avoid degenerecies
+            plans = [_good_plan_th_jac2jac!(S, mn, α+1, β, c, d, dims)]
+        else
+            plans = [_good_plan_th_jac2jac!(S, mn, α, β, c, d, dims)]
+        end
+    else
+        if α + β ≤ -1
+            plans = [_good_plan_th_jac2jac!(S, mn, α+1, β, α+1, d, dims), _good_plan_th_jac2jac!(S, mn, α+1, d, c, d, dims)]
+        else
+            plans = [_good_plan_th_jac2jac!(S, mn, α, β, α, d, dims), _good_plan_th_jac2jac!(S, mn, α, d, c, d, dims)]
+        end
+    end
+
+    Jac2JacPlanTH(plans, α, β, γ, δ, dims)
+end
+
+
+function _jacobi_raise_a!(B, a, b)
+    m, n = size(B, 1), size(B, 2)
+    if m > 1
+        @inbounds for j = 1:n
+            B[1,j] = B[1,j] - (1+b) / (a+b+3) * B[2,j]
+            for i = 2:m-1
+                B[i,j] = (i+a+b)/(a+b-1+2i) * B[i,j] - (i+b) / (a+b+2i+1) * B[i+1,j]
+            end
+            B[m,j] = (m+a+b)/(a+b-1+2m)*B[m,j]
+        end
+    end
+    B
+end
+
+function _jacobi_lower_a!(B, a, b)
+    m, n = size(B, 1), size(B, 2)
+
+    if m > 1
+        @inbounds for j = 1:n
+            B[m,j] = (a+b-1+2m)/(m+a+b) * B[m,j]
+            for i = m-1:-1:2
+                Bij = B[i,j] + (i+b) / (a+b+2i+1) * B[i+1,j]
+                B[i,j] = (a+b-1+2i)/(i+a+b)  * Bij
+            end
+            B[1,j] = B[1,j] + (1+b) / (a+b+3) * B[2,j]
+        end
+    end
+    B
+end
+
+
+
+function _jacobi_raise_b!(B, a, b)
+    m, n = size(B, 1), size(B, 2)
+    if m > 1
+        @inbounds for j = 1:n
+            B[1,j] = B[1,j] + (1+a) / (a+b+3) * B[2,j]
+            
+            for i = 2:m-1
+                B[i,j] = (i+a+b)/(a+b-1+2i) * B[i,j] + (i+a) / (a+b+2i+1) * B[i+1,j]
+            end
+            B[m,j] = (m+a+b)/(a+b-1+2m)*B[m,j]
+        end
+    end
+    B
+end
+
+function _jacobi_lower_b!(B, a, b)
+    m, n = size(B, 1), size(B, 2)
+
+    if m > 1
+        @inbounds for j = 1:n
+            B[m,j] = (a+b-1+2m)/(m+a+b) * B[m,j]
+            for i = m-1:-1:2
+                Bij = B[i,j] - (i+a) / (a+b+2i+1) * B[i+1,j]
+                B[i,j] = (a+b-1+2i)/(i+a+b)  * Bij
+            end
+            B[1,j] = B[1,j] - (1+a) / (a+b+3) * B[2,j]
+        end
+    end
+    B
+end
+
+
+
+function _jacobi_raise_b!(x, α, β, dims)
+    for d in dims
+        if d == 1
+            _jacobi_raise_b!(x, α, β)
+        else
+            _jacobi_raise_b!(x', α, β)
+        end
+    end
+    x
+end
+function _jacobi_raise_a!(x, α, β, dims)
+    for d in dims
+        if d == 1
+            _jacobi_raise_a!(x, α, β)
+        else
+            _jacobi_raise_a!(x', α, β)
+        end
+    end
+    x
+end
+
+function _jacobi_lower_b!(x, α, β, dims)
+    for d in dims
+        if d == 1
+            _jacobi_lower_b!(x, α, β-1)
+        else
+            _jacobi_lower_b!(x', α, β-1)
+        end
+    end
+    x
+end
+function _jacobi_lower_a!(x, α, β, dims)
+    for d in dims
+        if d == 1
+            _jacobi_lower_a!(x, α-1, β)
+        else
+            _jacobi_lower_a!(x', α-1, β)
+        end
+    end
+    x
+end
+
+
+function _jac2jac_integerinc!(x, α, β, γ, δ, dims)
+    while !(α ≈ γ && β ≈ δ)
+        if !(δ ≈ β) && δ > β
+            _jacobi_raise_b!(x, α, β, dims)
+            β += 1
+        elseif !(δ ≈ β) && δ < β
+            _jacobi_lower_b!(x, α, β, dims)
+            β -= 1
+        elseif !(γ ≈ α) && γ > α
+            _jacobi_raise_a!(x, α, β, dims)
+            α += 1
+        else
+            @assert γ < α
+            _jacobi_lower_a!(x, α, β, dims)
+            α -= 1
+        end
+    end
+    x
 end
 
+
+###
+# other routines
+###
+
 for f in (:th_leg2cheb, :th_cheb2leg, :th_leg2chebu)
     plan = Symbol("plan_", f, "!")
     @eval begin
@@ -315,5 +680,14 @@ for f in (:th_leg2cheb, :th_cheb2leg, :th_leg2chebu)
     end
 end
 
-th_ultra2ultra(v, λ₁, λ₂, dims...) = plan_th_ultra2ultra!(eltype(v),size(v),λ₁,λ₂, dims...)*copy(v)
-th_jac2jac(v, α, β, γ, δ, dims...) = plan_th_jac2jac!(eltype(v),size(v),α,β,γ,δ, dims...)*copy(v)
\ No newline at end of file
+plan_th_ultra2ultra!(::Type{S}, mn::NTuple{N,Int}, λ₁, λ₂, dims::UnitRange) where {N,S} = plan_th_ultra2ultra!(S, mn, λ₁, λ₂, tuple(dims...))
+plan_th_ultra2ultra!(::Type{S}, mn::Tuple{Int}, λ₁, λ₂, dims::Tuple{Int}=(1,)) where {S} = plan_th_ultra2ultra!(S, mn, λ₁, λ₂, dims...)
+plan_th_ultra2ultra!(::Type{S}, (m,n)::NTuple{2,Int}, λ₁, λ₂) where {S} = plan_th_ultra2ultra!(S, (m,n), λ₁, λ₂, (1,2))
+plan_th_ultra2ultra!(arr::AbstractArray{T}, λ₁, λ₂, dims...) where T = plan_th_ultra2ultra!(T, size(arr), λ₁, λ₂, dims...)
+th_ultra2ultra(v, λ₁, λ₂, dims...) = plan_th_ultra2ultra!(eltype(v), size(v), λ₁, λ₂, dims...)*copy(v)
+
+plan_th_jac2jac!(::Type{S}, mn::NTuple{N,Int}, α, β, γ, δ, dims::UnitRange) where {N,S} = plan_th_jac2jac!(S, mn, α, β, γ, δ, tuple(dims...))
+plan_th_jac2jac!(::Type{S}, mn::Tuple{Int}, α, β, γ, δ, dims::Tuple{Int}=(1,)) where {S} = plan_th_jac2jac!(S, mn, α, β, γ, δ, dims...)
+plan_th_jac2jac!(::Type{S}, (m,n)::NTuple{2,Int}, α, β, γ, δ) where {S} = plan_th_jac2jac!(S, (m,n), α, β, γ, δ, (1,2))
+plan_th_jac2jac!(arr::AbstractArray{T}, α, β, γ, δ, dims...) where T = plan_th_jac2jac!(T, size(arr), α, β, γ, δ, dims...)
+th_jac2jac(v, α, β, γ, δ, dims...) = plan_th_jac2jac!(eltype(v), size(v), α, β, γ, δ, dims...)*copy(v)
\ No newline at end of file
diff --git a/test/toeplitzhankeltests.jl b/test/toeplitzhankeltests.jl
index a3f8d36c..ce1d2f69 100644
--- a/test/toeplitzhankeltests.jl
+++ b/test/toeplitzhankeltests.jl
@@ -1,7 +1,7 @@
 using FastTransforms, Test
-import FastTransforms: th_leg2cheb, th_cheb2leg, th_ultra2ultra,th_jac2jac, th_leg2chebu,
+import FastTransforms: th_leg2cheb, th_cheb2leg, th_leg2chebu, th_ultra2ultra,th_jac2jac, th_leg2chebu,
                         lib_leg2cheb, lib_cheb2leg, lib_ultra2ultra, lib_jac2jac,
-                        plan_th_cheb2leg!, plan_th_leg2cheb!
+                        plan_th_cheb2leg!, plan_th_leg2chebu!, plan_th_leg2cheb!, plan_th_ultra2ultra!, plan_th_jac2jac!
 
 @testset "ToeplitzHankel" begin
     for x in ([1.0], [1.0,2,3,4,5], [1.0+im,2-3im,3+4im,4-5im,5+10im], collect(1.0:1000))
@@ -9,17 +9,38 @@ import FastTransforms: th_leg2cheb, th_cheb2leg, th_ultra2ultra,th_jac2jac, th_l
         @test th_cheb2leg(x) ≈ lib_cheb2leg(x)
         @test th_leg2chebu(x) ≈ lib_ultra2ultra(x, 0.5, 1.0)
         @test th_ultra2ultra(x,0.1, 0.2) ≈ lib_ultra2ultra(x, 0.1, 0.2)
-        @test th_jac2jac(x,0.1, 0.2,0.1,0.4) ≈ lib_jac2jac(x, 0.1, 0.2,0.1,0.4)
+        @test th_ultra2ultra(x,1, 2) ≈ lib_ultra2ultra(x, 1, 2)
+        @test th_ultra2ultra(x,0.1, 2.2) ≈ lib_ultra2ultra(x, 0.1, 2.2)
+        @test th_ultra2ultra(x, 2.2, 0.1) ≈ lib_ultra2ultra(x, 2.2, 0.1)
+        @test @inferred(th_jac2jac(x,0.1, 0.2,0.1,0.4)) ≈ lib_jac2jac(x, 0.1, 0.2,0.1,0.4)
         @test th_jac2jac(x,0.1, 0.2,0.3,0.2) ≈ lib_jac2jac(x, 0.1, 0.2,0.3,0.2)
+        @test th_jac2jac(x,0.1, 0.2,0.3,0.4) ≈ lib_jac2jac(x, 0.1, 0.2,0.3,0.4)
+        @test @inferred(th_jac2jac(x,0.1, 0.2,1.3,0.4)) ≈ lib_jac2jac(x, 0.1, 0.2,1.3,0.4)
+        @test th_jac2jac(x,0.1, 0.2,1.3,2.4) ≈ lib_jac2jac(x, 0.1, 0.2,1.3,2.4)
+        @test th_jac2jac(x,1.3,2.4, 0.1, 0.2) ≈ lib_jac2jac(x,1.3,2.4, 0.1, 0.2)
+        @test th_jac2jac(x,1.3, 1.2,-0.1,-0.2) ≈ lib_jac2jac(x, 1.3, 1.2,-0.1,-0.2)
+        @test @inferred(th_jac2jac(x,-0.5, -0.5, -0.5,-0.5)) ≈ lib_jac2jac(x, -0.5, -0.5, -0.5,-0.5)
+        @test th_jac2jac(x,-0.5, -0.5, 0.5,0.5) ≈ lib_jac2jac(x, -0.5, -0.5, 0.5,0.5)
+        @test th_jac2jac(x,0.5,0.5,-0.5, -0.5) ≈ lib_jac2jac(x, 0.5,0.5,-0.5, -0.5)
+        @test th_jac2jac(x,-0.5, -0.5, 0.5,-0.5) ≈ lib_jac2jac(x, -0.5, -0.5, 0.5,-0.5)
+        @test th_jac2jac(x, -1/2,-1/2,1/2,0) ≈ lib_jac2jac(x, -1/2,-1/2,1/2,0)
+        @test th_jac2jac(x, -1/2,-1/2,0,1/2) ≈ lib_jac2jac(x, -1/2,-1/2,0,1/2)
+        @test th_jac2jac(x, -3/4,-3/4,0,3/4) ≈ lib_jac2jac(x, -3/4,-3/4,0,3/4)
+        @test th_jac2jac(x,0, 0, 5, 5) ≈ lib_jac2jac(x, 0, 0, 5, 5)
+        if length(x) < 10
+            @test th_jac2jac(x, 5, 5, 0, 0) ≈ lib_jac2jac(x,  5, 5, 0, 0)
+        end
 
-
-        @test th_cheb2leg(th_leg2cheb(x)) ≈ x atol=1E-9
-        @test th_leg2cheb(th_cheb2leg(x)) ≈ x atol=1E-10
+        @test th_cheb2leg(th_leg2cheb(x)) ≈ x
+        @test th_leg2cheb(th_cheb2leg(x)) ≈ x
+        @test th_ultra2ultra(th_ultra2ultra(x, 0.1, 0.6), 0.6, 0.1) ≈ x
+        @test th_jac2jac(th_jac2jac(x, 0.1, 0.6, 0.1, 0.8), 0.1, 0.8, 0.1, 0.6) ≈ x
+        @test th_jac2jac(th_jac2jac(x, 0.1, 0.6, 0.2, 0.8), 0.2, 0.8, 0.1, 0.6) ≈ x
     end
 
     for X in (randn(5,4), randn(5,4) + im*randn(5,4))
         @test th_leg2cheb(X, 1) ≈ hcat([leg2cheb(X[:,j]) for j=1:size(X,2)]...)
-        @test_broken th_leg2cheb(X, 1) ≈ leg2cheb(X, 1)
+        @test_broken th_leg2cheb(X, 1) ≈ leg2cheb(X, 1) # matrices not supported in FastTransforms
         @test th_leg2cheb(X, 2) ≈ vcat([permutedims(leg2cheb(X[k,:])) for k=1:size(X,1)]...)
         @test_broken th_leg2cheb(X, 2) ≈ leg2cheb(X, 2)
         @test th_leg2cheb(X) ≈ th_leg2cheb(th_leg2cheb(X, 1), 2)
@@ -33,6 +54,45 @@ import FastTransforms: th_leg2cheb, th_cheb2leg, th_ultra2ultra,th_jac2jac, th_l
         @test th_leg2cheb(X) == plan_th_leg2cheb!(X, 1:2)*copy(X)
 
         @test th_leg2cheb(th_cheb2leg(X)) ≈ X
+
+        @test th_leg2chebu(X, 1) ≈ hcat([ultra2ultra(X[:,j], 0.5, 1.0) for j=1:size(X,2)]...)
+        @test th_leg2chebu(X, 2) ≈ vcat([permutedims(ultra2ultra(X[k,:], 0.5, 1.0)) for k=1:size(X,1)]...)
+        @test th_leg2chebu(X) ≈ th_leg2chebu(th_leg2chebu(X, 1), 2)
+
+        @test th_leg2chebu(X) == plan_th_leg2chebu!(X, 1:2)*copy(X)
+
+        @test th_ultra2ultra(X, 0.1, 0.6, 1) ≈ hcat([ultra2ultra(X[:,j], 0.1, 0.6) for j=1:size(X,2)]...)
+        @test th_ultra2ultra(X, 0.1, 0.6, 2) ≈ vcat([permutedims(ultra2ultra(X[k,:], 0.1, 0.6)) for k=1:size(X,1)]...)
+        @test th_ultra2ultra(X, 0.1, 0.6) ≈ th_ultra2ultra(th_ultra2ultra(X, 0.1, 0.6, 1), 0.1, 0.6, 2)
+
+        @test th_ultra2ultra(X, 0.1, 2.6, 1) ≈ hcat([ultra2ultra(X[:,j], 0.1, 2.6) for j=1:size(X,2)]...)
+        @test th_ultra2ultra(X, 0.1, 2.6, 2) ≈ vcat([permutedims(ultra2ultra(X[k,:], 0.1, 2.6)) for k=1:size(X,1)]...)
+        @test th_ultra2ultra(X, 0.1, 2.6) ≈ th_ultra2ultra(th_ultra2ultra(X, 0.1, 2.6, 1), 0.1, 2.6, 2)
+
+        @test th_ultra2ultra(X, 2.6, 0.1, 1) ≈ hcat([ultra2ultra(X[:,j], 2.6, 0.1) for j=1:size(X,2)]...)
+        @test th_ultra2ultra(X, 2.6, 0.1, 2) ≈ vcat([permutedims(ultra2ultra(X[k,:], 2.6, 0.1)) for k=1:size(X,1)]...)
+        @test th_ultra2ultra(X, 2.6, 0.1) ≈ th_ultra2ultra(th_ultra2ultra(X, 2.6, 0.1, 1), 2.6, 0.1, 2)
+
+        @test th_ultra2ultra(X, 0.1, 0.6) == plan_th_ultra2ultra!(X, 0.1, 0.6, 1:2)*copy(X)
+        @test th_ultra2ultra(X, 0.1, 0.6) == plan_th_ultra2ultra!(X, 0.1, 0.6, 1:2)*copy(X)
+
+        @test th_ultra2ultra(th_ultra2ultra(X, 0.1, 0.6), 0.6, 0.1) ≈ X
+
+        @test th_jac2jac(X, 0.1, 0.6, 0.1, 0.8, 1) ≈ hcat([jac2jac(X[:,j], 0.1, 0.6, 0.1, 0.8) for j=1:size(X,2)]...)
+        @test th_jac2jac(X, 0.1, 0.6, 0.1, 0.8, 2) ≈ vcat([permutedims(jac2jac(X[k,:], 0.1, 0.6, 0.1, 0.8)) for k=1:size(X,1)]...)
+        @test th_jac2jac(X, 0.1, 0.6, 0.1, 0.8) ≈ th_jac2jac(th_jac2jac(X, 0.1, 0.6, 0.1, 0.8, 1), 0.1, 0.6, 0.1, 0.8, 2)
+
+        @test th_jac2jac(X, 0.1, 0.6, 0.2, 0.8, 1) ≈ hcat([jac2jac(X[:,j], 0.1, 0.6, 0.2, 0.8) for j=1:size(X,2)]...)
+        @test th_jac2jac(X, 0.1, 0.6, 0.2, 0.8, 2) ≈ vcat([permutedims(jac2jac(X[k,:], 0.1, 0.6, 0.2, 0.8)) for k=1:size(X,1)]...)
+
+        @test th_jac2jac(X, 0.1, 0.6, 0.1, 0.8) == plan_th_jac2jac!(X, 0.1, 0.6, 0.1, 0.8, 1:2)*copy(X)
+        @test th_jac2jac(X, 0.1, 0.6, 0.1, 0.8) == plan_th_jac2jac!(X, 0.1, 0.6, 0.1, 0.8, 1:2)*copy(X)
+
+        @test th_jac2jac(th_jac2jac(X, 0.1, 0.6, 0.1, 0.8), 0.1, 0.8, 0.1, 0.6) ≈ X
+
+        @test th_jac2jac(X, 0.1, 0.6, 3.1, 2.8, 1) ≈ hcat([jac2jac(X[:,j], 0.1, 0.6, 3.1, 2.8) for j=1:size(X,2)]...)
+        @test th_jac2jac(X, 0.1, 0.6, 3.1, 2.8, 2) ≈ vcat([permutedims(jac2jac(X[k,:], 0.1, 0.6, 3.1, 2.8)) for k=1:size(X,1)]...)
+        @test th_jac2jac(X, 0.1, 0.6, 3.1, 2.8) ≈ th_jac2jac(th_jac2jac(X, 0.1, 0.6, 3.1, 2.8, 1), 0.1, 0.6, 3.1, 2.8, 2)
     end
 
     @testset "BigFloat" begin

From 2dcabebd3b31a59b3ece09e390e127a279a941c8 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Sat, 21 Oct 2023 21:41:30 +0100
Subject: [PATCH 151/222] Add th_cheb2jac and th_jac2cheb (#229)

* Add Cheb2Jac and Jac2Cheb

* seed random nums

* Update toeplitzhankeltests.jl
---
 Project.toml                |  9 ++++-
 src/toeplitzhankel.jl       | 72 +++++++++++++++++++++++++++++++++++--
 test/toeplitzhankeltests.jl | 24 +++++++++++--
 3 files changed, 99 insertions(+), 6 deletions(-)

diff --git a/Project.toml b/Project.toml
index 8a132703..3a97bbd3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -13,7 +13,6 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
@@ -27,3 +26,11 @@ Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
 ToeplitzMatrices = "0.7.1, 0.8"
 julia = "1.7"
+
+
+[extras]
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test", "Random"]
diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index eb11994e..3bd09d55 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -448,7 +448,7 @@ Jac2JacPlanTH(plans, α, β, γ, δ, dims) = Jac2JacPlanTH(plans, promote(α, β
 
 function *(P::Jac2JacPlanTH, A::AbstractArray)
     if P.α + P.β ≤ -1
-        _jacobi_raise_a!(A, P.α, P.β)
+        _jacobi_raise_a!(A, P.α, P.β, P.dims)
         c,d = _nearest_jacobi_par(P.α+1, P.γ), _nearest_jacobi_par(P.β, P.δ)
     else
         c,d = _nearest_jacobi_par(P.α, P.γ), _nearest_jacobi_par(P.β, P.δ)
@@ -690,4 +690,72 @@ plan_th_jac2jac!(::Type{S}, mn::NTuple{N,Int}, α, β, γ, δ, dims::UnitRange)
 plan_th_jac2jac!(::Type{S}, mn::Tuple{Int}, α, β, γ, δ, dims::Tuple{Int}=(1,)) where {S} = plan_th_jac2jac!(S, mn, α, β, γ, δ, dims...)
 plan_th_jac2jac!(::Type{S}, (m,n)::NTuple{2,Int}, α, β, γ, δ) where {S} = plan_th_jac2jac!(S, (m,n), α, β, γ, δ, (1,2))
 plan_th_jac2jac!(arr::AbstractArray{T}, α, β, γ, δ, dims...) where T = plan_th_jac2jac!(T, size(arr), α, β, γ, δ, dims...)
-th_jac2jac(v, α, β, γ, δ, dims...) = plan_th_jac2jac!(eltype(v), size(v), α, β, γ, δ, dims...)*copy(v)
\ No newline at end of file
+th_jac2jac(v, α, β, γ, δ, dims...) = plan_th_jac2jac!(eltype(v), size(v), α, β, γ, δ, dims...)*copy(v)
+
+
+####
+# cheb2jac
+####
+
+struct Cheb2JacPlanTH{T, Pl<:Jac2JacPlanTH{T}} <: Plan{T}
+    jac2jac::Pl
+end
+
+
+struct Jac2ChebPlanTH{T, Pl<:Jac2JacPlanTH{T}} <: Plan{T}
+    jac2jac::Pl
+end
+
+
+function jac_cheb_recurrencecoefficients(T, N)
+    n = 0:N
+    h = one(T)/2
+    A = (2n .+ one(T)) ./ (n .+ one(T))
+    A[1] /= 2
+    A, Zeros(n), 
+    ((n .- h) .* (n .- h) .* (2n .+ one(T))) ./ ((n .+ one(T)) .* n .* (2n .- one(T)))
+end
+
+
+function *(P::Cheb2JacPlanTH{T}, X::AbstractArray) where T
+    A,B,C = jac_cheb_recurrencecoefficients(T, max(size(X)...))
+
+    for d in P.jac2jac.dims
+        if d == 1
+            p = forwardrecurrence(size(X,1), A,B,C, one(T))
+            X .= p .\ X
+        else
+            @assert d == 2
+            n = size(X,2)
+            p = forwardrecurrence(size(X,2), A,B,C, one(T))
+            X .= X ./ transpose(p)
+        end
+    end
+    P.jac2jac*X
+end
+
+function *(P::Jac2ChebPlanTH{T}, X::AbstractArray) where T
+    X = P.jac2jac*X
+    A,B,C = jac_cheb_recurrencecoefficients(T, max(size(X)...))
+
+    for d in P.jac2jac.dims
+        if d == 1
+            p = forwardrecurrence(size(X,1), A,B,C, one(T))
+            X .= p .* X
+        else
+            @assert d == 2
+            n = size(X,2)
+            p = forwardrecurrence(size(X,2), A,B,C, one(T))
+            X .= X .* transpose(p)
+        end
+    end
+    X
+end
+
+plan_th_cheb2jac!(::Type{T}, mn, α, β, dims...) where T = Cheb2JacPlanTH(plan_th_jac2jac!(T, mn, -one(α)/2, -one(α)/2, α, β, dims...))
+plan_th_cheb2jac!(arr::AbstractArray{T}, α, β, dims...) where T = plan_th_cheb2jac!(T, size(arr), α, β, dims...)
+th_cheb2jac(v, α, β, dims...) = plan_th_cheb2jac!(eltype(v), size(v), α, β, dims...)*copy(v)
+
+plan_th_jac2cheb!(::Type{T}, mn, α, β, dims...) where T = Jac2ChebPlanTH(plan_th_jac2jac!(T, mn, α, β, -one(α)/2, -one(α)/2, dims...))
+plan_th_jac2cheb!(arr::AbstractArray{T}, α, β, dims...) where T = plan_th_jac2cheb!(T, size(arr), α, β, dims...)
+th_jac2cheb(v, α, β, dims...) = plan_th_jac2cheb!(eltype(v), size(v), α, β, dims...)*copy(v)
\ No newline at end of file
diff --git a/test/toeplitzhankeltests.jl b/test/toeplitzhankeltests.jl
index ce1d2f69..b72232ed 100644
--- a/test/toeplitzhankeltests.jl
+++ b/test/toeplitzhankeltests.jl
@@ -1,7 +1,10 @@
-using FastTransforms, Test
+using FastTransforms, Test, Random
 import FastTransforms: th_leg2cheb, th_cheb2leg, th_leg2chebu, th_ultra2ultra,th_jac2jac, th_leg2chebu,
                         lib_leg2cheb, lib_cheb2leg, lib_ultra2ultra, lib_jac2jac,
-                        plan_th_cheb2leg!, plan_th_leg2chebu!, plan_th_leg2cheb!, plan_th_ultra2ultra!, plan_th_jac2jac!
+                        plan_th_cheb2leg!, plan_th_leg2chebu!, plan_th_leg2cheb!, plan_th_ultra2ultra!, plan_th_jac2jac!,
+                        th_cheb2jac, th_jac2cheb
+
+Random.seed!(0)
 
 @testset "ToeplitzHankel" begin
     for x in ([1.0], [1.0,2,3,4,5], [1.0+im,2-3im,3+4im,4-5im,5+10im], collect(1.0:1000))
@@ -26,11 +29,14 @@ import FastTransforms: th_leg2cheb, th_cheb2leg, th_leg2chebu, th_ultra2ultra,th
         @test th_jac2jac(x, -1/2,-1/2,1/2,0) ≈ lib_jac2jac(x, -1/2,-1/2,1/2,0)
         @test th_jac2jac(x, -1/2,-1/2,0,1/2) ≈ lib_jac2jac(x, -1/2,-1/2,0,1/2)
         @test th_jac2jac(x, -3/4,-3/4,0,3/4) ≈ lib_jac2jac(x, -3/4,-3/4,0,3/4)
-        @test th_jac2jac(x,0, 0, 5, 5) ≈ lib_jac2jac(x, 0, 0, 5, 5)
         if length(x) < 10
+            @test th_jac2jac(x,0, 0, 5, 5) ≈ lib_jac2jac(x, 0, 0, 5, 5)
             @test th_jac2jac(x, 5, 5, 0, 0) ≈ lib_jac2jac(x,  5, 5, 0, 0)
         end
 
+        @test th_cheb2jac(x, 0.2, 0.3) ≈ cheb2jac(x, 0.2, 0.3)
+        @test th_jac2cheb(x, 0.2, 0.3) ≈ jac2cheb(x, 0.2, 0.3)
+
         @test th_cheb2leg(th_leg2cheb(x)) ≈ x
         @test th_leg2cheb(th_cheb2leg(x)) ≈ x
         @test th_ultra2ultra(th_ultra2ultra(x, 0.1, 0.6), 0.6, 0.1) ≈ x
@@ -93,6 +99,18 @@ import FastTransforms: th_leg2cheb, th_cheb2leg, th_leg2chebu, th_ultra2ultra,th
         @test th_jac2jac(X, 0.1, 0.6, 3.1, 2.8, 1) ≈ hcat([jac2jac(X[:,j], 0.1, 0.6, 3.1, 2.8) for j=1:size(X,2)]...)
         @test th_jac2jac(X, 0.1, 0.6, 3.1, 2.8, 2) ≈ vcat([permutedims(jac2jac(X[k,:], 0.1, 0.6, 3.1, 2.8)) for k=1:size(X,1)]...)
         @test th_jac2jac(X, 0.1, 0.6, 3.1, 2.8) ≈ th_jac2jac(th_jac2jac(X, 0.1, 0.6, 3.1, 2.8, 1), 0.1, 0.6, 3.1, 2.8, 2)
+
+        @test th_jac2jac(X, -0.5, -0.5, 3.1, 2.8, 1) ≈ hcat([jac2jac(X[:,j], -0.5, -0.5, 3.1, 2.8) for j=1:size(X,2)]...)
+        @test th_jac2jac(X, -0.5, -0.5, 3.1, 2.8, 2) ≈ vcat([permutedims(jac2jac(X[k,:], -0.5, -0.5, 3.1, 2.8)) for k=1:size(X,1)]...)
+        @test th_jac2jac(X, -0.5, -0.5, 3.1, 2.8) ≈ th_jac2jac(th_jac2jac(X, -0.5, -0.5, 3.1, 2.8, 1), -0.5, -0.5, 3.1, 2.8, 2)
+
+        @test th_cheb2jac(X, 3.1, 2.8, 1) ≈ hcat([cheb2jac(X[:,j], 3.1, 2.8) for j=1:size(X,2)]...)
+        @test th_cheb2jac(X, 3.1, 2.8, 2) ≈ vcat([permutedims(cheb2jac(X[k,:], 3.1, 2.8)) for k=1:size(X,1)]...)
+        @test th_cheb2jac(X, 3.1, 2.8) ≈ th_cheb2jac(th_cheb2jac(X, 3.1, 2.8, 1), 3.1, 2.8, 2)
+
+        @test th_jac2cheb(X, 3.1, 2.8, 1) ≈ hcat([jac2cheb(X[:,j], 3.1, 2.8) for j=1:size(X,2)]...)
+        @test th_jac2cheb(X, 3.1, 2.8, 2) ≈ vcat([permutedims(jac2cheb(X[k,:], 3.1, 2.8)) for k=1:size(X,1)]...)
+        @test th_jac2cheb(X, 3.1, 2.8) ≈ th_jac2cheb(th_jac2cheb(X, 3.1, 2.8, 1), 3.1, 2.8, 2)
     end
 
     @testset "BigFloat" begin

From 649f25eefbd330390a52b93be4ae61bfcd0156dd Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Sun, 22 Oct 2023 09:25:09 +0100
Subject: [PATCH 152/222] Fix bugs hit by th_cheb2jac(x, 1,1) (#230)

* Fix bugs hit by th_cheb2jac(x, 1,1)

* Update toeplitzhankel.jl
---
 Project.toml                |  2 +-
 src/toeplitzhankel.jl       | 25 +++++++++++++++++++------
 test/toeplitzhankeltests.jl |  2 ++
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3a97bbd3..96fdc0a6 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.9"
+version = "0.15.10"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index 3bd09d55..4e0ab3db 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -283,8 +283,17 @@ end
 isapproxinteger(::Integer) = true
 isapproxinteger(x) = isinteger(x) || x ≈ round(Int,x)  || x+1 ≈ round(Int,x+1)
 
+"""
+  _nearest_jacobi_par(α, γ)
 
-_nearest_jacobi_par(α, γ) = isapproxinteger(α-γ) ? α : round(Int,α,RoundDown) + mod(γ,1)
+returns a number that is an integer different than γ but less than 1 away from α.
+"""
+function _nearest_jacobi_par(α::T, γ::T) where T
+    ret = isapproxinteger(α-γ) ? α : round(Int,α,RoundDown) + mod(γ,1)
+    ret ≤ -1 ? ret + 1 : ret
+end
+_nearest_jacobi_par(α::T, ::T) where T<:Integer = α
+_nearest_jacobi_par(α, γ) = _nearest_jacobi_par(promote(α,γ)...)
 
 
 struct Ultra2UltraPlanTH{T, Plans, Dims} <: Plan{T}
@@ -296,12 +305,16 @@ end
 
 function *(P::Ultra2UltraPlanTH, A::AbstractArray)
     ret = A
-    for p in P.plans
-        ret = p*ret
-    end
-    c = _nearest_jacobi_par(P.λ₁, P.λ₂)
+    if isapproxinteger(P.λ₂ - P.λ₁)
+        _ultra2ultra_integerinc!(ret, P.λ₁, P.λ₂, P.dims)
+    else
+        for p in P.plans
+            ret = p*ret
+        end
+        c = _nearest_jacobi_par(P.λ₁, P.λ₂)
 
-    _ultra2ultra_integerinc!(ret, c, P.λ₂, P.dims)
+        _ultra2ultra_integerinc!(ret, c, P.λ₂, P.dims)
+    end
 end
 
 function _ultra2ultraTH_TLC(::Type{S}, mn, λ₁, λ₂, d) where {S}
diff --git a/test/toeplitzhankeltests.jl b/test/toeplitzhankeltests.jl
index b72232ed..19274747 100644
--- a/test/toeplitzhankeltests.jl
+++ b/test/toeplitzhankeltests.jl
@@ -36,6 +36,8 @@ Random.seed!(0)
 
         @test th_cheb2jac(x, 0.2, 0.3) ≈ cheb2jac(x, 0.2, 0.3)
         @test th_jac2cheb(x, 0.2, 0.3) ≈ jac2cheb(x, 0.2, 0.3)
+        @test th_cheb2jac(x, 1, 1) ≈ cheb2jac(x, 1, 1)
+        @test th_jac2cheb(x, 1, 1) ≈ jac2cheb(x, 1, 1)
 
         @test th_cheb2leg(th_leg2cheb(x)) ≈ x
         @test th_leg2cheb(th_cheb2leg(x)) ≈ x

From d9b615df1be09c73dc8d52e73aed73574da61369 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Sun, 22 Oct 2023 19:23:14 +0100
Subject: [PATCH 153/222] Matrix Chebyshev U Transforms (#231)

* Matrix Chebyshev U Transforms

* Cheb U 2 changes

* fix inferrability

* tests pass

* matrix ichebyshevu

* tests pass
---
 Project.toml              |   2 +-
 src/chebyshevtransform.jl | 245 ++++++++++++++++++++++++++------------
 test/chebyshevtests.jl    |  28 ++++-
 3 files changed, 195 insertions(+), 80 deletions(-)

diff --git a/Project.toml b/Project.toml
index 96fdc0a6..3de40b67 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.10"
+version = "0.15.11"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 11e07010..28d77eb7 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -19,9 +19,12 @@ ChebyshevTransformPlan{T,kind}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T
     ChebyshevTransformPlan{T,kind,K,inplace,N,R}(plan)
 
 # jump through some hoops to make inferrable
+
+_fftKtype(::Val{N}, _...) where N = Vector{Int32}
+
 function plan_chebyshevtransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevTransformPlan{T,1,_fftKtype(Val{N}(), dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevTransformPlan{T,1}(FFTW.plan_r2r!(x, FIRSTKIND, dims...; kws...))
     end
@@ -34,7 +37,7 @@ end
 
 function plan_chebyshevtransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevTransformPlan{T,1,_fftKtype(Val{N}(), dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevTransformPlan{T,1}(FFTW.plan_r2r(x, FIRSTKIND, dims...; kws...))
     end
@@ -149,17 +152,15 @@ end
 end
 
 function *(P::ChebyshevTransformPlan{T,1,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
-    n = length(x)
-    n == 0 && return x
+    isempty(x) && return x
 
     y = P.plan*x # will be  === x if in-place
     _cheb1_rescale!(P.plan.region, y)
 end
 
 function mul!(y::AbstractArray{T,N}, P::ChebyshevTransformPlan{T,1,K,false,N}, x::AbstractArray{<:Any,N}) where {T,K,N}
-    n = length(x)
-    length(y) == n || throw(DimensionMismatch("output must match dimension"))
-    n == 0 && return y
+    size(y) == size(x) || throw(DimensionMismatch("output must match dimension"))
+    isempty(x) && return y
     _plan_mul!(y, P.plan, x)
     _cheb1_rescale!(P.plan.region, y)
 end
@@ -248,7 +249,7 @@ inv(P::IChebyshevTransformPlan{T,1}) where {T} = ChebyshevTransformPlan{T,1}(inv
 
 function plan_ichebyshevtransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevTransformPlan{T,1,_fftKtype(Val{N}(), dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevTransformPlan{T,1}(FFTW.plan_r2r!(x, IFIRSTKIND, dims...; kws...))
     end
@@ -260,7 +261,7 @@ end
 
 function plan_ichebyshevtransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevTransformPlan{T,1,_fftKtype(Val{N}(), dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevTransformPlan{T,1}(FFTW.plan_r2r(x, IFIRSTKIND, dims...; kws...))
     end
@@ -308,7 +309,7 @@ function mul!(y::AbstractArray{T,N}, P::IChebyshevTransformPlan{T,1,K,false,N},
     size(y) == size(x) || throw(DimensionMismatch("output must match dimension"))
     isempty(x) && return y
 
-    _icheb1_prescale!(P.plan.region, x) # Todo: don't mutate x
+    _icheb1_prescale!(P.plan.region, x) # TODO: don't mutate x
     _plan_mul!(y, P.plan, x)
     _icheb1_postscale!(P.plan.region, x)
     ldiv!(2^length(P.plan.region), y)
@@ -371,7 +372,9 @@ ichebyshevtransform!(x::AbstractArray, dims...; kwds...) = plan_ichebyshevtransf
 ichebyshevtransform(x, dims...; kwds...) = plan_ichebyshevtransform(x, dims...; kwds...)*x
 
 
-## Chebyshev U
+#######
+# Chebyshev U
+#######
 
 const UFIRSTKIND = FFTW.RODFT10
 const USECONDKIND = FFTW.RODFT00
@@ -388,7 +391,7 @@ ChebyshevUTransformPlan{T,kind}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {
 
 function plan_chebyshevutransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevUTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevUTransformPlan{T,1,_fftKtype(Val{N}(), dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevUTransformPlan{T,1}(FFTW.plan_r2r!(x, UFIRSTKIND, dims...; kws...))
     end
@@ -400,13 +403,19 @@ end
 
 function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevUTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevUTransformPlan{T,1,_fftKtype(Val{N}(), dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevUTransformPlan{T,1}(FFTW.plan_r2r(x, UFIRSTKIND, dims...; kws...))
     end
 end
 function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
-    any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
+    if isempty(dims)
+        any(≤(1), size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
+    else
+        for d in dims[1]
+            size(x,d) ≤ 1 && throw(ArgumentError("Array must contain at least 2 entries"))
+        end
+    end
     ChebyshevUTransformPlan{T,2}(FFTW.plan_r2r(x, USECONDKIND, dims...; kws...))
 end
 
@@ -414,75 +423,141 @@ plan_chebyshevutransform!(x::AbstractArray, dims...; kws...) = plan_chebyshevutr
 plan_chebyshevutransform(x::AbstractArray, dims...; kws...) = plan_chebyshevutransform(x, Val(1), dims...; kws...)
 
 
-@inline function _chebu1_prescale!(_, x::AbstractVector{T}) where T
-    n = length(x)
-    for k=1:n # sqrt(1-x_j^2) weight
-        x[k] *= sinpi(one(T)/(2n) + (k-one(T))/n)/n
+@inline function _chebu1_prescale!(d::Number, x::AbstractVecOrMat{T}) where T
+    m,n = size(x,1),size(x,2)
+    if d == 1
+        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
+            x[k,j] *= sinpi(one(T)/(2m) + (k-one(T))/m)/m
+        end
+    else
+        @assert d == 2
+        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
+            x[k,j] *= sinpi(one(T)/(2n) + (j-one(T))/n)/n
+        end
     end
     x
 end
 
-@inline function _chebu1_postscale!(_, x::AbstractVector{T}) where T
-    n = length(x)
-    for k=1:n # sqrt(1-x_j^2) weight
-        x[k] /= sinpi(one(T)/(2n) + (k-one(T))/n)/n
+@inline function _chebu1_prescale!(d, y::AbstractArray)
+    for k in d
+        _chebu1_prescale!(k, y)
+    end
+    y
+end
+
+@inline function _chebu1_postscale!(d::Number, x::AbstractVecOrMat{T}) where T
+    m,n = size(x,1),size(x,2)
+    if d == 1
+        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
+            x[k,j] /= sinpi(one(T)/(2m) + (k-one(T))/m)/m
+        end
+    else
+        @assert d == 2
+        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
+            x[k,j] /= sinpi(one(T)/(2n) + (j-one(T))/n)/n
+        end
     end
     x
 end
 
-function *(P::ChebyshevUTransformPlan{T,1,K,true}, x::AbstractVector{T}) where {T,K}
+@inline function _chebu1_postscale!(d, y::AbstractArray)
+    for k in d
+        _chebu1_postscale!(k, y)
+    end
+    y
+end
+
+function *(P::ChebyshevUTransformPlan{T,1,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
     length(x) ≤ 1 && return x
     _chebu1_prescale!(P.plan.region, x)
     P.plan * x
 end
 
-function mul!(y::AbstractVector{T}, P::ChebyshevUTransformPlan{T,1,K,false}, x::AbstractVector{T}) where {T,K}
-    n = length(x)
-    length(x) ≤ 1 && return copyto!(y, x)
-    _chebu1_prescale!(P.plan.region, x)
+function mul!(y::AbstractArray{T}, P::ChebyshevUTransformPlan{T,1,K,false}, x::AbstractArray{T}) where {T,K}
+    size(y) == size(x) || throw(DimensionMismatch("output must match dimension"))
+    isempty(x) && return y
+    _chebu1_prescale!(P.plan.region, x) # Todo don't mutate x
     _plan_mul!(y, P.plan, x)
     _chebu1_postscale!(P.plan.region, x)
+    for d in P.plan.region
+        size(y,d) == 1 && ldiv!(2, y) # fix doubling
+    end
     y
 end
 
-@inline function _chebu2_prescale!(_, x::AbstractVector{T}) where T
-    n = length(x)
-    c = one(T)/ (n+1)
-    for k=1:n # sqrt(1-x_j^2) weight
-        x[k] *= sinpi(k*c)
+@inline function _chebu2_prescale!(d::Number, x::AbstractVecOrMat{T}) where T
+    m,n = size(x,1),size(x,2)
+    if d == 1
+        c = one(T)/ (m+1)
+        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
+            x[k,j] *= sinpi(k*c)
+        end
+    else
+        @assert d == 2
+        c = one(T)/ (n+1)
+        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
+            x[k,j] *= sinpi(j*c)
+        end
     end
     x
 end
 
-@inline function _chebu2_postscale!(_, x::AbstractVector{T}) where T
-    n = length(x)
-    c = one(T)/ (n+1)
-    @inbounds for k=1:n # sqrt(1-x_j^2) weight
-        x[k] /= sinpi(k*c)
+@inline function _chebu2_prescale!(d, y::AbstractArray)
+    for k in d
+        _chebu2_prescale!(k, y)
+    end
+    y
+end
+
+
+@inline function _chebu2_postscale!(d::Number, x::AbstractVecOrMat{T}) where T
+    m,n = size(x,1),size(x,2)
+    if d == 1
+        c = one(T)/ (m+1)
+        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
+            x[k,j] /= sinpi(k*c)
+        end
+    else
+        @assert d == 2
+        c = one(T)/ (n+1)
+        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
+            x[k,j] /= sinpi(j*c)
+        end
     end
     x
 end
 
-function *(P::ChebyshevUTransformPlan{T,2,K,true}, x::AbstractVector{T}) where {T,K}
-    n = length(x)
-    n ≤ 1 && return x
-    _chebu2_prescale!(P.plan.region, x)
-    lmul!(one(T)/ (n+1), P.plan * x)
+@inline function _chebu2_postscale!(d, y::AbstractArray)
+    for k in d
+        _chebu2_postscale!(k, y)
+    end
+    y
 end
 
-function mul!(y::AbstractVector{T}, P::ChebyshevUTransformPlan{T,2,K,false}, x::AbstractVector{T}) where {T,K}
-    n = length(x)
-    n ≤ 1 && return copyto!(y, x)
+function *(P::ChebyshevUTransformPlan{T,2,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
+    sc = one(T)
+    for d in P.plan.region
+        sc *= one(T)/(size(x,d)+1)
+    end
     _chebu2_prescale!(P.plan.region, x)
+    lmul!(sc, P.plan * x)
+end
+
+function mul!(y::AbstractArray{T}, P::ChebyshevUTransformPlan{T,2,K,false}, x::AbstractArray{T}) where {T,K}
+    sc = one(T)
+    for d in P.plan.region
+        sc *= one(T)/(size(x,d)+1)
+    end
+    _chebu2_prescale!(P.plan.region, x) # TODO don't mutate x
     _plan_mul!(y, P.plan, x)
     _chebu2_postscale!(P.plan.region, x)
-    lmul!(one(T)/ (n+1), y)
+    lmul!(sc, y)
 end
 
 *(P::ChebyshevUTransformPlan{T,kind,K,false,N}, x::AbstractArray{T,N}) where {T,kind,K,N} =
     mul!(similar(x), P, x)
 
-chebyshevutransform!(x::AbstractVector{T}, dims...; kws...) where {T<:fftwNumber} =
+chebyshevutransform!(x::AbstractArray{T}, dims...; kws...) where {T<:fftwNumber} =
     plan_chebyshevutransform!(x, dims...; kws...)*x
 
 
@@ -509,19 +584,19 @@ IChebyshevUTransformPlan{T,kind}(F::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,
 
 function plan_ichebyshevutransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevUTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevUTransformPlan{T,1,_fftKtype(Val{N}(), dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevUTransformPlan{T,1}(FFTW.plan_r2r!(x, IUFIRSTKIND, dims...; kws...))
     end
 end
 function plan_ichebyshevutransform!(x::AbstractArray{T,N}, ::Val{2}, dims...; kws...) where {T<:fftwNumber,N}
     any(≤(1),size(x)) && throw(ArgumentError("Array must contain at least 2 entries"))
-    IChebyshevUTransformPlan{T,2}(FFTW.plan_r2r!(x, USECONDKIND))
+    IChebyshevUTransformPlan{T,2}(FFTW.plan_r2r!(x, USECONDKIND, dims...))
 end
 
 function plan_ichebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevUTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevUTransformPlan{T,1,_fftKtype(Val{N}(), dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevUTransformPlan{T,1}(FFTW.plan_r2r(x, IUFIRSTKIND, dims...; kws...))
     end
@@ -543,42 +618,59 @@ inv(P::IChebyshevUTransformPlan{T,2}) where {T} = ChebyshevUTransformPlan{T,2}(P
 inv(P::ChebyshevUTransformPlan{T,1}) where {T} = IChebyshevUTransformPlan{T,1}(inv(P.plan).p)
 inv(P::IChebyshevUTransformPlan{T,1}) where {T} = ChebyshevUTransformPlan{T,1}(inv(P.plan).p)
 
-
-function _ichebyu1_postscale!(_, x::AbstractVector{T}) where T
-    n = length(x)
-    @inbounds for k=1:n # sqrt(1-x_j^2) weight
-        x[k] /= 2sinpi(one(T)/(2n) + (k-one(T))/n)
+@inline function _ichebu1_postscale!(d::Number, x::AbstractVecOrMat{T}) where T
+    m,n = size(x,1),size(x,2)
+    if d == 1
+        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
+            x[k,j] /= 2sinpi(one(T)/(2m) + (k-one(T))/m)
+        end
+    else
+        @assert d == 2
+        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
+            x[k,j] /= 2sinpi(one(T)/(2n) + (j-one(T))/n)
+        end
     end
     x
 end
-function *(P::IChebyshevUTransformPlan{T,1,K,true}, x::AbstractVector{T}) where {T<:fftwNumber,K}
-    n = length(x)
-    n ≤ 1 && return x
 
-    x = P.plan * x
-    _ichebyu1_postscale!(P.plan.region, x)
+@inline function _ichebu1_postscale!(d, y::AbstractArray)
+    for k in d
+        _ichebu1_postscale!(k, y)
+    end
+    y
 end
 
-function mul!(y::AbstractVector{T}, P::IChebyshevUTransformPlan{T,1,K,false}, x::AbstractVector{T}) where {T<:fftwNumber,K}
-    n = length(x)
-    length(y) == n || throw(DimensionMismatch("output must match dimension"))
-    n ≤ 1 && return x
+function *(P::IChebyshevUTransformPlan{T,1,K,true}, x::AbstractArray{T}) where {T<:fftwNumber,K}
+    length(x) ≤ 1 && return x
+    x = P.plan * x
+    _ichebu1_postscale!(P.plan.region, x)
+end
 
+function mul!(y::AbstractArray{T}, P::IChebyshevUTransformPlan{T,1,K,false}, x::AbstractArray{T}) where {T<:fftwNumber,K}
+    size(y) == size(x) || throw(DimensionMismatch("output must match dimension"))
+    isempty(x) && return y
     _plan_mul!(y, P.plan, x)
-    _ichebyu1_postscale!(P.plan.region, y)
+    _ichebu1_postscale!(P.plan.region, y)
+    for d in P.plan.region
+        size(y,d) == 1 && lmul!(2, y) # fix doubling
+    end
+    y
 end
 
-function _ichebu2_rescale!(_, x::AbstractVector{T}) where T
-    n = length(x)
-    c = one(T)/ (n+1)
-    for k=1:n # sqrt(1-x_j^2) weight
-        x[k] /= sinpi(k*c)
-    end
+function _ichebu2_rescale!(d::Number, x::AbstractArray{T}) where T
+    _chebu2_postscale!(d, x)
     ldiv!(2, x)
     x
 end
 
-function *(P::IChebyshevUTransformPlan{T,2,K,true}, x::AbstractVector{T}) where {T<:fftwNumber,K}
+@inline function _ichebu2_rescale!(d, y::AbstractArray)
+    for k in d
+        _ichebu2_rescale!(k, y)
+    end
+    y
+end
+
+function *(P::IChebyshevUTransformPlan{T,2,K,true}, x::AbstractArray{T}) where {T<:fftwNumber,K}
     n = length(x)
     n ≤ 1 && return x
 
@@ -586,16 +678,15 @@ function *(P::IChebyshevUTransformPlan{T,2,K,true}, x::AbstractVector{T}) where
     _ichebu2_rescale!(P.plan.region, x)
 end
 
-function mul!(y::AbstractVector{T}, P::IChebyshevUTransformPlan{T,2,K,false}, x::AbstractVector{T}) where {T<:fftwNumber,K}
-    n = length(x)
-    length(y) == n || throw(DimensionMismatch("output must match dimension"))
-    n ≤ 1 && return x
+function mul!(y::AbstractArray{T}, P::IChebyshevUTransformPlan{T,2,K,false}, x::AbstractArray{T}) where {T<:fftwNumber,K}
+    size(y) == size(x) || throw(DimensionMismatch("output must match dimension"))
+    length(x) ≤ 1 && return x
 
     _plan_mul!(y, P.plan, x)
     _ichebu2_rescale!(P.plan.region, y)
 end
 
-ichebyshevutransform!(x::AbstractVector{T}, dims...; kwds...) where {T<:fftwNumber} =
+ichebyshevutransform!(x::AbstractArray{T}, dims...; kwds...) where {T<:fftwNumber} =
     plan_ichebyshevutransform!(x, dims...; kwds...)*x
 
 ichebyshevutransform(x, dims...; kwds...) = plan_ichebyshevutransform(x, dims...; kwds...)*x
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 696c13f0..82b8da5c 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -154,6 +154,7 @@ using FastTransforms, Test
             p_1 = chebyshevpoints(T, n)
             f = exp.(p_1)
             g = @inferred(chebyshevutransform(f))
+            @test f ≈ exp.(p_1)
 
             f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-1]' * g
             @test f̃(0.1) ≈ exp(T(0.1))
@@ -221,7 +222,7 @@ using FastTransforms, Test
 
             f̃ = x -> [sin((k+1)*acos(x))/sin(acos(x)) for k=0:n-3]' * g
             @test f̃(0.1) ≈ exp(T(0.1))
-            @test @inferred(ichebyshevutransform(g, Val(2))) ≈ exp.(p_2)
+            @test @inferred(ichebyshevutransform(g, Val(2))) ≈ f ≈ exp.(p_2)
 
             fcopy = copy(f)
             gcopy = copy(g)
@@ -291,6 +292,29 @@ using FastTransforms, Test
             @test chebyshevtransform(ichebyshevtransform(X)) ≈ X
         end
 
+        @testset "chebyshevutransform" begin
+            @test @inferred(chebyshevutransform(X,1)) ≈ @inferred(chebyshevutransform!(copy(X),1)) ≈ hcat(chebyshevutransform.([X[:,k] for k=axes(X,2)])...)
+            @test chebyshevutransform(X,2) ≈ chebyshevutransform!(copy(X),2) ≈ hcat(chebyshevutransform.([X[k,:] for k=axes(X,1)])...)'
+            @test @inferred(chebyshevutransform(X,Val(2),1)) ≈ @inferred(chebyshevutransform!(copy(X),Val(2),1)) ≈ hcat(chebyshevutransform.([X[:,k] for k=axes(X,2)],Val(2))...)
+            @test chebyshevutransform(X,Val(2),2) ≈ chebyshevutransform!(copy(X),Val(2),2) ≈ hcat(chebyshevutransform.([X[k,:] for k=axes(X,1)],Val(2))...)'
+
+            @test @inferred(chebyshevutransform(X)) ≈ @inferred(chebyshevutransform!(copy(X))) ≈ chebyshevutransform(chebyshevutransform(X,1),2)
+            @test @inferred(chebyshevutransform(X,Val(2))) ≈ @inferred(chebyshevutransform!(copy(X),Val(2))) ≈ chebyshevutransform(chebyshevutransform(X,Val(2),1),Val(2),2)
+        end
+
+        @testset "ichebyshevutransform" begin
+            @test @inferred(ichebyshevutransform(X,1)) ≈ @inferred(ichebyshevutransform!(copy(X),1)) ≈ hcat(ichebyshevutransform.([X[:,k] for k=axes(X,2)])...)
+            @test ichebyshevutransform(X,2) ≈ ichebyshevutransform!(copy(X),2) ≈ hcat(ichebyshevutransform.([X[k,:] for k=axes(X,1)])...)'
+            @test @inferred(ichebyshevutransform(X,Val(2),1)) ≈ @inferred(ichebyshevutransform!(copy(X),Val(2),1)) ≈ hcat(ichebyshevutransform.([X[:,k] for k=axes(X,2)],Val(2))...)
+            @test ichebyshevutransform(X,Val(2),2) ≈ ichebyshevutransform!(copy(X),Val(2),2) ≈ hcat(ichebyshevutransform.([X[k,:] for k=axes(X,1)],Val(2))...)'
+
+            @test @inferred(ichebyshevutransform(X)) ≈ @inferred(ichebyshevutransform!(copy(X))) ≈ ichebyshevutransform(ichebyshevutransform(X,1),2)
+            @test @inferred(ichebyshevutransform(X,Val(2))) ≈ @inferred(ichebyshevutransform!(copy(X),Val(2))) ≈ ichebyshevutransform(ichebyshevutransform(X,Val(2),1),Val(2),2)
+
+            @test ichebyshevutransform(chebyshevutransform(X)) ≈ X
+            @test chebyshevutransform(ichebyshevutransform(X)) ≈ X
+        end
+
         X = randn(1,1)
         @test chebyshevtransform!(copy(X), Val(1)) == ichebyshevtransform!(copy(X), Val(1)) == X
         @test_throws ArgumentError chebyshevtransform!(copy(X), Val(2))
@@ -394,7 +418,7 @@ using FastTransforms, Test
             plan_chebyshevutransform(X,Val(1),2), plan_chebyshevutransform(X, Val(2),2),
             plan_ichebyshevutransform(X,Val(1),1), plan_ichebyshevutransform(X, Val(2),1),
             plan_ichebyshevutransform(X,Val(1),2), plan_ichebyshevutransform(X, Val(2),2))
-            @test_broken F \ (F*X) ≈ F * (F\X) ≈ X
+            @test F \ (F*X) ≈ F * (F\X) ≈ X
         end
     end
 end

From db873136f7b2eaf5f8ee33701359ad49dc7443d5 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Tue, 7 Nov 2023 16:21:31 +0530
Subject: [PATCH 154/222] Docstrings for orthogonal polynomial transforms
 (#199)

* Docstrings for orthogonal polynomial transforms

* Update leg2cheb

Co-authored-by: Sheehan Olver <solver@mac.com>

* Update cheb2leg

* StridedVector -> AbstractVector

---------

Co-authored-by: Sheehan Olver <solver@mac.com>
---
 src/FastTransforms.jl |   1 +
 src/docstrings.jl     | 121 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 src/docstrings.jl

diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index c8017b27..f9d99e74 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -134,5 +134,6 @@ end
 #     end
 # end
 
+include("docstrings.jl")
 
 end # module
diff --git a/src/docstrings.jl b/src/docstrings.jl
new file mode 100644
index 00000000..c3ecd7a3
--- /dev/null
+++ b/src/docstrings.jl
@@ -0,0 +1,121 @@
+"""
+	leg2cheb(v::AbstractVector; normleg::Bool=false, normcheb::Bool=false)
+
+Convert the vector of expansions coefficients `v` from a Legendre to a Chebyshev basis.
+The keyword arguments denote whether the bases are normalized.
+"""
+leg2cheb
+
+"""
+	cheb2leg(v::AbstractVector; normcheb::Bool=false, normleg::Bool=false)
+
+Convert the vector of expansions coefficients `v` from a Chebyshev to a Legendre basis.
+The keyword arguments denote whether the bases are normalized.
+"""
+cheb2leg
+
+"""
+	ultra2ultra(v::AbstractVector, λ, μ; norm1::Bool=false, norm2::Bool=false)
+
+Convert the vector of expansions coefficients `v` from an Ultraspherical basis of
+order `λ` to an Ultraspherical basis of order `μ`.
+The keyword arguments denote whether the bases are normalized.
+"""
+ultra2ultra
+
+"""
+	jac2jac(v::AbstractVector, α, β, γ, δ; norm1::Bool=false, norm2::Bool=false)
+
+Convert the vector of expansions coefficients `v` from a Jacobi basis of
+order `(α,β)` to a Jacobi basis of order `(γ,δ)`.
+The keyword arguments denote whether the bases are normalized.
+"""
+jac2jac
+
+"""
+	lag2lag(v::AbstractVector, α, β; norm1::Bool=false, norm2::Bool=false)
+
+Convert the vector of expansions coefficients `v` from a Laguerre basis of
+order `α` to a La basis of order `β`.
+The keyword arguments denote whether the bases are normalized."""
+lag2lag
+
+"""
+	jac2ultra(v::AbstractVector, α, β, λ; normjac::Bool=false, normultra::Bool=false)
+
+Convert the vector of expansions coefficients `v` from a Jacobi basis of
+order `(α,β)` to an Ultraspherical basis of order `λ`.
+The keyword arguments denote whether the bases are normalized."""
+jac2ultra
+
+"""
+	ultra2jac(v::AbstractVector, λ, α, β; normultra::Bool=false, normjac::Bool=false)
+
+Convert the vector of expansions coefficients `v` from an Ultraspherical basis of
+order `λ` to a Jacobi basis of order `(α,β)`.
+The keyword arguments denote whether the bases are normalized.
+"""
+ultra2jac
+
+"""
+	jac2cheb(v::AbstractVector, α, β; normjac::Bool=false, normcheb::Bool=false)
+
+Convert the vector of expansions coefficients `v` from a Jacobi basis of
+order `(α,β)` to a Chebyshev basis.
+The keyword arguments denote whether the bases are normalized.
+"""
+jac2cheb
+
+"""
+	cheb2jac(v::AbstractVector, α, β; normcheb::Bool=false, normjac::Bool=false)
+
+Convert the vector of expansions coefficients `v` from a Chebyshev basis to a
+Jacobi basis of order `(α,β)`.
+The keyword arguments denote whether the bases are normalized.
+"""
+cheb2jac
+
+"""
+	ultra2cheb(v::AbstractVector, λ; normultra::Bool=false, normcheb::Bool=false)
+
+Convert the vector of expansions coefficients `v` from an Ultraspherical basis of
+order `λ` to a Chebyshev basis.
+The keyword arguments denote whether the bases are normalized.
+"""
+ultra2cheb
+
+"""
+	cheb2ultra(v::AbstractVector, λ; normcheb::Bool=false, normultra::Bool=false)
+
+Convert the vector of expansions coefficients `v` from a Chebyshev basis
+to an Ultraspherical basis of order `λ`.
+The keyword arguments denote whether the bases are normalized.
+"""
+cheb2ultra
+
+"""
+	associatedjac2jac(v::AbstractVector, c::Integer, α, β, γ, δ; norm1::Bool=false, norm2::Bool=false)
+
+Convert the vector of expansions coefficients `v` from an associated Jacobi basis
+of orders `(α,β)` to a Jacobi basis of order `(γ,δ)`.
+The keyword arguments denote whether the bases are normalized.
+"""
+associatedjac2jac
+
+"""
+	modifiedjac2jac(v::AbstractVector{T}, α, β, u::Vector{T}; verbose::Bool=false) where {T}
+	modifiedjac2jac(v::AbstractVector{T}, α, β, u::Vector{T}, v::Vector{T}; verbose::Bool=false) where {T}
+"""
+modifiedjac2jac
+
+"""
+	modifiedlag2lag(v::AbstractVector{T}, α, u::Vector{T}; verbose::Bool=false)
+	modifiedlag2lag(v::AbstractVector{T}, α, u::Vector{T}, v::Vector{T}; verbose::Bool=false) where {T}
+"""
+modifiedlag2lag
+
+"""
+	modifiedherm2herm(v::AbstractVector{T}, u::Vector{T}; verbose::Bool=false)
+	modifiedherm2herm(v::AbstractVector{T}, u::Vector{T}, v::Vector{T}; verbose::Bool=false) where {T}
+"""
+modifiedherm2herm

From 198d22016aeca1d5cb260bedc8d8864612ee829e Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Fri, 10 Nov 2023 22:46:21 +0000
Subject: [PATCH 155/222] Support arbitrary tensors in Chebyshev U (#232)

* Support arbitrary tensors

* tests pass
---
 src/chebyshevtransform.jl | 100 ++++++++++++--------------------------
 test/chebyshevtests.jl    |  45 ++++++++++++++++-
 2 files changed, 74 insertions(+), 71 deletions(-)

diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 28d77eb7..01dd61af 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -423,19 +423,13 @@ plan_chebyshevutransform!(x::AbstractArray, dims...; kws...) = plan_chebyshevutr
 plan_chebyshevutransform(x::AbstractArray, dims...; kws...) = plan_chebyshevutransform(x, Val(1), dims...; kws...)
 
 
-@inline function _chebu1_prescale!(d::Number, x::AbstractVecOrMat{T}) where T
-    m,n = size(x,1),size(x,2)
-    if d == 1
-        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
-            x[k,j] *= sinpi(one(T)/(2m) + (k-one(T))/m)/m
-        end
-    else
-        @assert d == 2
-        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
-            x[k,j] *= sinpi(one(T)/(2n) + (j-one(T))/n)/n
-        end
-    end
-    x
+_permfirst(d, N) = [d; 1:d-1; d+1:N]
+
+@inline function _chebu1_prescale!(d::Number, X::AbstractArray{T,N}) where {T,N}
+    X̃ = PermutedDimsArray(X, _permfirst(d, N))
+    m = size(X̃,1)
+    X̃ .= (sinpi.(one(T)/(2m) .+ ((1:m) .- one(T))/m) ./ m) .* X̃
+    X
 end
 
 @inline function _chebu1_prescale!(d, y::AbstractArray)
@@ -445,19 +439,11 @@ end
     y
 end
 
-@inline function _chebu1_postscale!(d::Number, x::AbstractVecOrMat{T}) where T
-    m,n = size(x,1),size(x,2)
-    if d == 1
-        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
-            x[k,j] /= sinpi(one(T)/(2m) + (k-one(T))/m)/m
-        end
-    else
-        @assert d == 2
-        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
-            x[k,j] /= sinpi(one(T)/(2n) + (j-one(T))/n)/n
-        end
-    end
-    x
+@inline function _chebu1_postscale!(d::Number, X::AbstractArray{T,N}) where {T,N}
+    X̃ = PermutedDimsArray(X, _permfirst(d, N))
+    m = size(X̃,1)
+    X̃ .= X̃ ./ (sinpi.(one(T)/(2m) .+ ((1:m) .- one(T))/m) ./ m)
+    X
 end
 
 @inline function _chebu1_postscale!(d, y::AbstractArray)
@@ -485,21 +471,13 @@ function mul!(y::AbstractArray{T}, P::ChebyshevUTransformPlan{T,1,K,false}, x::A
     y
 end
 
-@inline function _chebu2_prescale!(d::Number, x::AbstractVecOrMat{T}) where T
-    m,n = size(x,1),size(x,2)
-    if d == 1
-        c = one(T)/ (m+1)
-        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
-            x[k,j] *= sinpi(k*c)
-        end
-    else
-        @assert d == 2
-        c = one(T)/ (n+1)
-        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
-            x[k,j] *= sinpi(j*c)
-        end
-    end
-    x
+
+@inline function _chebu2_prescale!(d::Number, X::AbstractArray{T,N}) where {T,N}
+    X̃ = PermutedDimsArray(X, _permfirst(d, N))
+    m = size(X̃,1)
+    c = one(T)/ (m+1)
+    X̃ .= sinpi.((1:m) .* c) .* X̃
+    X
 end
 
 @inline function _chebu2_prescale!(d, y::AbstractArray)
@@ -510,21 +488,12 @@ end
 end
 
 
-@inline function _chebu2_postscale!(d::Number, x::AbstractVecOrMat{T}) where T
-    m,n = size(x,1),size(x,2)
-    if d == 1
-        c = one(T)/ (m+1)
-        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
-            x[k,j] /= sinpi(k*c)
-        end
-    else
-        @assert d == 2
-        c = one(T)/ (n+1)
-        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
-            x[k,j] /= sinpi(j*c)
-        end
-    end
-    x
+@inline function _chebu2_postscale!(d::Number, X::AbstractArray{T,N}) where {T,N}
+    X̃ = PermutedDimsArray(X, _permfirst(d, N))
+    m = size(X̃,1)
+    c = one(T)/ (m+1)
+    X̃ .= X̃ ./ sinpi.((1:m) .* c)
+    X
 end
 
 @inline function _chebu2_postscale!(d, y::AbstractArray)
@@ -618,21 +587,14 @@ inv(P::IChebyshevUTransformPlan{T,2}) where {T} = ChebyshevUTransformPlan{T,2}(P
 inv(P::ChebyshevUTransformPlan{T,1}) where {T} = IChebyshevUTransformPlan{T,1}(inv(P.plan).p)
 inv(P::IChebyshevUTransformPlan{T,1}) where {T} = ChebyshevUTransformPlan{T,1}(inv(P.plan).p)
 
-@inline function _ichebu1_postscale!(d::Number, x::AbstractVecOrMat{T}) where T
-    m,n = size(x,1),size(x,2)
-    if d == 1
-        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
-            x[k,j] /= 2sinpi(one(T)/(2m) + (k-one(T))/m)
-        end
-    else
-        @assert d == 2
-        for j = 1:n, k = 1:m # sqrt(1-x_j^2) weight
-            x[k,j] /= 2sinpi(one(T)/(2n) + (j-one(T))/n)
-        end
-    end
-    x
+@inline function _ichebu1_postscale!(d::Number, X::AbstractArray{T,N}) where {T,N}
+    X̃ = PermutedDimsArray(X, _permfirst(d, N))
+    m = size(X̃,1)
+    X̃ .= X̃ ./ (2 .* sinpi.(one(T)/(2m) .+ ((1:m) .- one(T))/m))
+    X
 end
 
+
 @inline function _ichebu1_postscale!(d, y::AbstractArray)
     for k in d
         _ichebu1_postscale!(k, y)
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 82b8da5c..d6f704bf 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -164,11 +164,11 @@ using FastTransforms, Test
             gcopy = copy(g)
             P = @inferred(plan_chebyshevutransform(f))
             @test P*f ≈ g
-            @test f == fcopy
+            @test f ≈ fcopy
             @test_throws ArgumentError P * T[1,2]
             P = @inferred(plan_chebyshevutransform(f, 1:1))
             @test P*f ≈ g
-            @test f == fcopy
+            @test f ≈ fcopy
             @test_throws ArgumentError P * T[1,2]
 
             P = @inferred(plan_chebyshevutransform!(f))
@@ -364,6 +364,47 @@ using FastTransforms, Test
             @test ichebyshevtransform(chebyshevtransform(X)) ≈ X
             @test chebyshevtransform(ichebyshevtransform(X)) ≈ X
         end
+    
+        @testset "chebyshevutransform" begin
+            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevutransform(X[:,k,j]) end
+            @test @inferred(chebyshevutransform(X,1)) ≈ @inferred(chebyshevutransform!(copy(X),1)) ≈ X̃
+            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevutransform(X[k,:,j]) end
+            @test chebyshevutransform(X,2) ≈ chebyshevutransform!(copy(X),2) ≈ X̃
+            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevutransform(X[k,j,:]) end
+            @test chebyshevutransform(X,3) ≈ chebyshevutransform!(copy(X),3) ≈ X̃
+
+            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevutransform(X[:,k,j],Val(2)) end
+            @test @inferred(chebyshevutransform(X,Val(2),1)) ≈ @inferred(chebyshevutransform!(copy(X),Val(2),1)) ≈ X̃
+            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevutransform(X[k,:,j],Val(2)) end
+            @test chebyshevutransform(X,Val(2),2) ≈ chebyshevutransform!(copy(X),Val(2),2) ≈ X̃
+            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevutransform(X[k,j,:],Val(2)) end
+            @test chebyshevutransform(X,Val(2),3) ≈ chebyshevutransform!(copy(X),Val(2),3) ≈ X̃
+
+            @test @inferred(chebyshevutransform(X)) ≈ @inferred(chebyshevutransform!(copy(X))) ≈ chebyshevutransform(chebyshevutransform(chebyshevutransform(X,1),2),3)
+            @test @inferred(chebyshevutransform(X,Val(2))) ≈ @inferred(chebyshevutransform!(copy(X),Val(2))) ≈ chebyshevutransform(chebyshevutransform(chebyshevutransform(X,Val(2),1),Val(2),2),Val(2),3)
+        end
+
+        @testset "ichebyshevutransform" begin
+            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevutransform(X[:,k,j]) end
+            @test @inferred(ichebyshevutransform(X,1)) ≈ @inferred(ichebyshevutransform!(copy(X),1)) ≈ X̃
+            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevutransform(X[k,:,j]) end
+            @test ichebyshevutransform(X,2) ≈ ichebyshevutransform!(copy(X),2) ≈ X̃
+            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevutransform(X[k,j,:]) end
+            @test ichebyshevutransform(X,3) ≈ ichebyshevutransform!(copy(X),3) ≈ X̃
+
+            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevutransform(X[:,k,j],Val(2)) end
+            @test @inferred(ichebyshevutransform(X,Val(2),1)) ≈ @inferred(ichebyshevutransform!(copy(X),Val(2),1)) ≈ X̃
+            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevutransform(X[k,:,j],Val(2)) end
+            @test ichebyshevutransform(X,Val(2),2) ≈ ichebyshevutransform!(copy(X),Val(2),2) ≈ X̃
+            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevutransform(X[k,j,:],Val(2)) end
+            @test ichebyshevutransform(X,Val(2),3) ≈ ichebyshevutransform!(copy(X),Val(2),3) ≈ X̃
+
+            @test @inferred(ichebyshevutransform(X)) ≈ @inferred(ichebyshevutransform!(copy(X))) ≈ ichebyshevutransform(ichebyshevutransform(ichebyshevutransform(X,1),2),3)
+            @test @inferred(ichebyshevutransform(X,Val(2))) ≈ @inferred(ichebyshevutransform!(copy(X),Val(2))) ≈ ichebyshevutransform(ichebyshevutransform(ichebyshevutransform(X,Val(2),1),Val(2),2),Val(2),3)
+
+            @test ichebyshevutransform(chebyshevutransform(X)) ≈ X
+            @test chebyshevutransform(ichebyshevutransform(X)) ≈ X
+        end
 
         X = randn(1,1,1)
         @test chebyshevtransform!(copy(X), Val(1)) == ichebyshevtransform!(copy(X), Val(1)) == X

From dcaf8f6338a35f0f0eb512dd5d5f323178dbc179 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Sun, 12 Nov 2023 21:45:18 +0000
Subject: [PATCH 156/222] Support higher order tensors in ToeplitzHankel (#233)

* Support higher order tensors in ToeplitzHankel

* Update toeplitzhankel.jl

* Generalise ToeplitzPlan

* generalis plan_upper for matrices

* consolidate plan_uppertoeplitz

* 4-tensor tests

* Update toeplitzplans.jl

* generalise _th_applymul!

* Tesnor leg2cheb

* tests pass

* cheb2leg tensor support

* fixes

* v0.15.12

* Update toeplitzplans.jl
---
 Project.toml                |   2 +-
 src/FastTransforms.jl       |   2 +-
 src/toeplitzhankel.jl       | 136 +++++++--------------
 src/toeplitzplans.jl        | 231 ++++++++++--------------------------
 test/toeplitzhankeltests.jl |  33 ++++++
 test/toeplitzplanstests.jl  |  91 +++++++++++---
 6 files changed, 215 insertions(+), 280 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3de40b67..7b29f896 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.11"
+version = "0.15.12"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index f9d99e74..e9bf6fac 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -8,7 +8,7 @@ using FastGaussQuadrature, FillArrays, LinearAlgebra,
 @reexport using GenericFFT
 
 import Base: convert, unsafe_convert, eltype, ndims, adjoint, transpose, show,
-             *, \, inv, length, size, view, getindex
+             *, \, inv, length, size, view, getindex, tail, OneTo
 
 import Base.GMP: Limb
 
diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index 4e0ab3db..fef934f6 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -16,75 +16,42 @@ so that `L[:,k] = DL*C[:,k]` and `R[:,k] = DR*C[:,k]`.
 This allows a Cholesky decomposition in 𝒪(K²N) operations and 𝒪(KN) storage, K = log N log ɛ⁻¹.
 The tuple storage allows plans applied to each dimension.
 """
-struct ToeplitzHankelPlan{S, N, M, N1, TP<:ToeplitzPlan{S,N1}} <: Plan{S}
-    T::NTuple{M,TP}
-    L::NTuple{M,Matrix{S}}
-    R::NTuple{M,Matrix{S}}
-    tmp::Array{S,N1}
-    dims::NTuple{M,Int}
-    function ToeplitzHankelPlan{S,N,M,N1,TP}(T::NTuple{M,TP}, L, R, dims) where {S,TP,N,N1,M}
+struct ToeplitzHankelPlan{S, N, N1, LowR, TP, Dims} <: Plan{S}
+    T::TP # A length M Vector or Tuple of ToeplitzPlan
+    L::LowR  # A length M Vector or Tuple of Matrices storing low rank factors of L
+    R::LowR # A length M Vector or Tuple of Matrices storing low rank factors of R
+    tmp::Array{S,N1} # A larger dimensional array to transform each scaled array all-at-once
+    dims::Dims # A length M Vector or Tuple of Int storing the dimensions acted on
+    function ToeplitzHankelPlan{S,N,N1,LowR,TP,Dims}(T::TP, L::LowR, R::LowR, dims) where {S,N,N1,LowR,TP,Dims}
         tmp = Array{S}(undef, max.(size.(T)...)...)
-        new{S,N,M,N1,TP}(T, L, R, tmp, dims)
+        new{S,N,N1,LowR,TP,Dims}(T, L, R, tmp, dims)
     end
-    ToeplitzHankelPlan{S,N,M,N1,TP}(T::NTuple{M,TP}, L, R, dims::Int) where {S,TP,N,N1,M} =
-        ToeplitzHankelPlan{S,N,M,N1,TP}(T, L, R, (dims,))
 end
 
-ToeplitzHankelPlan(T::ToeplitzPlan{S,2}, L::Matrix, R::Matrix, dims=1) where S =
-    ToeplitzHankelPlan{S, 1, 1, 2, typeof(T)}((T,), (L,), (R,), dims)
 
-ToeplitzHankelPlan(T::ToeplitzPlan{S,3}, L::Matrix, R::Matrix, dims) where S =
-    ToeplitzHankelPlan{S, 2, 1,3, typeof(T)}((T,), (L,), (R,), dims)
+ToeplitzHankelPlan{S,N,M}(T::TP, L::LowR, R::LowR, dims::Dims) where {S,N,M,LowR,TP,Dims} = ToeplitzHankelPlan{S,N,M,LowR,TP,Dims}(T, L, R, dims)
+ToeplitzHankelPlan{S,N}(T, L, R, dims) where {S,N} = ToeplitzHankelPlan{S,N,N+1}(T, L, R, dims)
+ToeplitzHankelPlan(T::ToeplitzPlan{S,M}, L::Matrix, R::Matrix, dims=1) where {S,M} = ToeplitzHankelPlan{S,M-1,M}((T,), (L,), (R,), dims)
 
-ToeplitzHankelPlan(T::NTuple{2,TP}, L::Tuple, R::Tuple, dims) where {S,TP<:ToeplitzPlan{S,3}} =
-    ToeplitzHankelPlan{S, 2,2,3, TP}(T, L, R, dims)
 
-
-function *(P::ToeplitzHankelPlan{<:Any,1}, v::AbstractVector)
-    (R,),(L,),(T,),tmp = P.R,P.L,P.T,P.tmp
-    tmp .= R .* v
-    T * tmp
-    tmp .= L .* tmp
-    sum!(v, tmp)
-end
-
-function _th_applymul1!(v, T, L, R, tmp)
-    N = size(R,2)
-    m,n = size(v)
-    tmp[1:m,1:n,1:N] .=  reshape(R,size(R,1),1,N) .* v
-    T * view(tmp,1:m,1:n,1:N)
-    view(tmp,1:m,1:n,1:N) .*=  reshape(L,size(L,1),1,N)
-    sum!(v, view(tmp,1:m,1:n,1:N))
-end
-
-function _th_applymul2!(v, T, L, R, tmp)
-    N = size(R,2)
-    m,n = size(v)
-    tmp[1:m,1:n,1:N] .=  reshape(R,1,size(R,1),N) .* v
-    T * view(tmp,1:m,1:n,1:N)
-    view(tmp,1:m,1:n,1:N) .*=  reshape(L,1,size(L,1),N)
-    sum!(v, view(tmp,1:m,1:n,1:N))
+_reshape_broadcast(d, R, ::Val{N}, M) where N = reshape(R,ntuple(k -> k == d ? size(R,1) : 1, Val(N))...,M)
+function _th_applymul!(d, v::AbstractArray{<:Any,N}, T, L, R, tmp) where N
+    M = size(R,2)
+    ax = (axes(v)..., OneTo(M))
+    tmp[ax...] .=  _reshape_broadcast(d, R, Val(N), M) .* v
+    T * view(tmp, ax...)
+    view(tmp,ax...) .*= _reshape_broadcast(d, L, Val(N), M)
+    sum!(v, view(tmp,ax...))
 end
 
 
-function *(P::ToeplitzHankelPlan{<:Any,2,1}, v::AbstractMatrix)
-    (R,),(L,),(T,),tmp = P.R,P.L,P.T,P.tmp
-    if P.dims == (1,)
-        _th_applymul1!(v, T, L, R, tmp)
-    else
-        _th_applymul2!(v, T, L, R, tmp)
+function *(P::ToeplitzHankelPlan{<:Any,N}, v::AbstractArray{<:Any,N}) where N
+    for (R,L,T,d) in zip(P.R,P.L,P.T,P.dims)
+        _th_applymul!(d, v, T, L, R, P.tmp)
     end
     v
 end
 
-function *(P::ToeplitzHankelPlan{<:Any,2,2}, v::AbstractMatrix)
-    (R1,R2),(L1,L2),(T1,T2),tmp = P.R,P.L,P.T,P.tmp
-
-    _th_applymul1!(v, T1, L1, R1, tmp)
-    _th_applymul2!(v, T2, L2, R2, tmp)
-
-    v
-end
 
 # partial cholesky for a Hankel matrix
 
@@ -166,9 +133,9 @@ function *(P::ChebyshevToLegendrePlanTH, v::AbstractVector{S}) where S
     v
 end
 
-function _cheb2leg_rescale1!(V::AbstractMatrix{S}) where S
-    m,n = size(V)
-    for j = 1:n
+function _cheb2leg_rescale1!(V::AbstractArray{S}) where S
+    m = size(V,1)
+    for j = CartesianIndices(tail(axes(V)))
         ret = zero(S)
         @inbounds for k = 1:2:m
             ret += -V[k,j]/(k*(k-2))
@@ -178,24 +145,15 @@ function _cheb2leg_rescale1!(V::AbstractMatrix{S}) where S
     V
 end
 
+_dropfirstdim(d::Int) = ()
+_dropfirstdim(d::Int, m, szs...) = ((d == 1 ? 2 : 1):m, _dropfirstdim(d-1, szs...)...)
 
-function *(P::ChebyshevToLegendrePlanTH, V::AbstractMatrix)
+function *(P::ChebyshevToLegendrePlanTH, V::AbstractArray{<:Any,N}) where N
     m,n = size(V)
-    dims = P.toeplitzhankel.dims
-    if dims == (1,)
-        _cheb2leg_rescale1!(V)
-        P.toeplitzhankel*view(V,2:m,:)
-    elseif dims == (2,)
-        _cheb2leg_rescale1!(transpose(V))
-        P.toeplitzhankel*view(V,:,2:n)
-    else
-        @assert dims == (1,2)
-        (R1,R2),(L1,L2),(T1,T2),tmp = P.toeplitzhankel.R,P.toeplitzhankel.L,P.toeplitzhankel.T,P.toeplitzhankel.tmp
-
-        _cheb2leg_rescale1!(V)
-        _th_applymul1!(view(V,2:m,:), T1, L1, R1, tmp)
-        _cheb2leg_rescale1!(transpose(V))
-        _th_applymul2!(view(V,:,2:n), T2, L2, R2, tmp)
+    tmp = P.toeplitzhankel.tmp
+    for (d,R,L,T) in zip(P.toeplitzhankel.dims,P.toeplitzhankel.R,P.toeplitzhankel.L,P.toeplitzhankel.T)
+        _cheb2leg_rescale1!(PermutedDimsArray(V, _permfirst(d, N)))
+        _th_applymul!(d, view(V, _dropfirstdim(d, size(V)...)...), T, L, R, tmp)
     end
     V
 end
@@ -226,18 +184,14 @@ function _leg2chebuTH_TLC(::Type{S}, mn, d) where {S}
     (T, (1:n) .* C, C)
 end
 
-
 for f in (:leg2cheb, :leg2chebu)
     plan = Symbol("plan_th_", f, "!")
     TLC = Symbol("_", f, "TH_TLC")
     @eval begin
-        $plan(::Type{S}, mn::Tuple, dims::Int) where {S} = ToeplitzHankelPlan($TLC(S, mn, dims)..., dims)
-
-        function $plan(::Type{S}, mn::NTuple{2,Int}, dims::NTuple{2,Int}) where {S}
-            @assert dims == (1,2)
-            T1,L1,C1 = $TLC(S, mn, 1)
-            T2,L2,C2 = $TLC(S, mn, 2)
-            ToeplitzHankelPlan((T1,T2), (L1,L2), (C1,C2), dims)
+        $plan(::Type{S}, mn::NTuple{N,Int}, dims::Int) where {S,N} = ToeplitzHankelPlan($TLC(S, mn, dims)..., dims)
+        function $plan(::Type{S}, mn::NTuple{N,Int}, dims) where {S,N}
+            TLCs = $TLC.(S, Ref(mn), dims)
+            ToeplitzHankelPlan{S,N}(map(first, TLCs), map(TLC -> TLC[2], TLCs), map(last, TLCs), dims)
         end
     end
 end
@@ -265,13 +219,11 @@ function _cheb2legTH_TLC(::Type{S}, mn, d) where S
     T, DL .* C, DR .* C
 end
 
-plan_th_cheb2leg!(::Type{S}, mn::Tuple, dims::Int) where {S} = ChebyshevToLegendrePlanTH(ToeplitzHankelPlan(_cheb2legTH_TLC(S, mn, dims)..., dims))
+plan_th_cheb2leg!(::Type{S}, mn::NTuple{N,Int}, dims::Int) where {S,N} = ChebyshevToLegendrePlanTH(ToeplitzHankelPlan(_cheb2legTH_TLC(S, mn, dims)..., dims))
 
-function plan_th_cheb2leg!(::Type{S}, mn::NTuple{2,Int}, dims::NTuple{2,Int}) where {S}
-    @assert dims == (1,2)
-    T1,L1,C1 = _cheb2legTH_TLC(S, mn, 1)
-    T2,L2,C2 = _cheb2legTH_TLC(S, mn, 2)
-    ChebyshevToLegendrePlanTH(ToeplitzHankelPlan((T1,T2), (L1,L2), (C1,C2), dims))
+function plan_th_cheb2leg!(::Type{S}, mn::NTuple{N,Int}, dims) where {S,N}
+    TLCs = _cheb2legTH_TLC.(S, Ref(mn), dims)
+    ChebyshevToLegendrePlanTH(ToeplitzHankelPlan{S,N}(map(first, TLCs), map(TLC -> TLC[2], TLCs), map(last, TLCs), dims))
 end
 
 
@@ -337,7 +289,7 @@ _good_plan_th_ultra2ultra!(::Type{S}, mn, λ₁, λ₂, dims::Int) where S = Toe
 function _good_plan_th_ultra2ultra!(::Type{S}, mn::NTuple{2,Int}, λ₁, λ₂, dims::NTuple{2,Int}) where S
     T1,L1,C1 = _ultra2ultraTH_TLC(S, mn, λ₁, λ₂, 1)
     T2,L2,C2 = _ultra2ultraTH_TLC(S, mn, λ₁, λ₂, 2)
-    ToeplitzHankelPlan((T1,T2), (L1,L2), (C1,C2), dims)
+    ToeplitzHankelPlan{S,2}((T1,T2), (L1,L2), (C1,C2), dims)
 end
 
 
@@ -515,7 +467,7 @@ _good_plan_th_jac2jac!(::Type{S}, mn, α, β, γ, δ, dims::Int) where S = Toepl
 function _good_plan_th_jac2jac!(::Type{S}, mn::NTuple{2,Int}, α, β, γ, δ, dims::NTuple{2,Int}) where S
     T1,L1,C1 = _jac2jacTH_TLC(S, mn, α, β, γ, δ, 1)
     T2,L2,C2 = _jac2jacTH_TLC(S, mn, α, β, γ, δ, 2)
-    ToeplitzHankelPlan((T1,T2), (L1,L2), (C1,C2), dims)
+    ToeplitzHankelPlan{S,2}((T1,T2), (L1,L2), (C1,C2), dims)
 end
 
 
@@ -685,10 +637,8 @@ end
 for f in (:th_leg2cheb, :th_cheb2leg, :th_leg2chebu)
     plan = Symbol("plan_", f, "!")
     @eval begin
-        $plan(::Type{S}, mn::NTuple{N,Int}, dims::UnitRange) where {N,S} = $plan(S, mn, tuple(dims...))
-        $plan(::Type{S}, mn::Tuple{Int}, dims::Tuple{Int}=(1,)) where {S} = $plan(S, mn, dims...)
-        $plan(::Type{S}, (m,n)::NTuple{2,Int}) where {S} = $plan(S, (m,n), (1,2))
         $plan(arr::AbstractArray{T}, dims...) where T = $plan(T, size(arr), dims...)
+        $plan(::Type{S}, mn::NTuple{N,Int}) where {S,N} = $plan(S, mn, ntuple(identity,Val(N)))
         $f(v, dims...) = $plan(eltype(v), size(v), dims...)*copy(v)
     end
 end
diff --git a/src/toeplitzplans.jl b/src/toeplitzplans.jl
index ab08d1f7..42d24062 100644
--- a/src/toeplitzplans.jl
+++ b/src/toeplitzplans.jl
@@ -1,39 +1,46 @@
 using FFTW
 import FFTW: plan_r2r!
 
-struct ToeplitzPlan{T, N, M, S, VECS<:Tuple{Vararg{Vector{S}}}, P<:Plan{S}, Pi<:Plan{S}} <: Plan{T}
-    vectors::VECS
+
+"""
+    ToeplitzPlan
+
+applies Toeplitz matrices fast along each dimension.
+"""
+
+struct ToeplitzPlan{T, N, Dims, S, VECS, P<:Plan{S}, Pi<:Plan{S}} <: Plan{T}
+    vectors::VECS # Vector or Tuple of storage
     tmp::Array{S,N}
     dft::P
     idft::Pi
-    dims::NTuple{M,Int}
+    dims::Dims
 end
 
-ToeplitzPlan{T}(v::AbstractVector, tmp, dft, idft, dims) where T = ToeplitzPlan{T}((v,), tmp, dft, idft, dims)
-ToeplitzPlan{T}(v::Tuple{Vararg{Vector{S}}}, tmp::Array{S,N}, dft::Plan{S}, idft::Plan{S}, dims::NTuple{M,Int}) where {T,S,N,M} = ToeplitzPlan{T,N,M,S,typeof(v),typeof(dft), typeof(idft)}(v, tmp, dft, idft, dims)
-ToeplitzPlan{T}(v::Tuple{Vararg{Vector{S}}}, tmp::Array{S,N}, dft::Plan{S}, idft::Plan{S}, dims::Int) where {T,S,N} = ToeplitzPlan{T}(v, tmp, dft, idft, (dims,))
+ToeplitzPlan{T}(v, tmp::Array{S,N}, dft::Plan{S}, idft::Plan{S}, dims) where {T,S,N} = ToeplitzPlan{T,N,typeof(dims),S,typeof(v),typeof(dft), typeof(idft)}(v, tmp, dft, idft, dims)
+
 
-size(A::ToeplitzPlan{<:Any,1}) = ((length(A.tmp)+1) ÷ 2,)
-function size(A::ToeplitzPlan{<:Any,2,1})
-    if A.dims == (1,)
-        ((size(A.tmp,1)+1) ÷ 2, size(A.tmp,2))
-    else # A.dims == (2,)
-        (size(A.tmp,1), (size(A.tmp,2)+1) ÷ 2)
+divdimby2(d::Int, sz1, szs...) = isone(d) ? ((sz1 + 1) ÷ 2, szs...) : (sz1, divdimby2(d-1, szs...)...)
+muldimby2(d::Int, sz1, szs...) = isone(d) ? (max(0,2sz1 - 1), szs...) : (sz1, muldimby2(d-1, szs...)...)
+
+function toeplitzplan_size(dims, szs)
+    ret = szs
+    for d in dims
+        ret = divdimby2(d, ret...)
     end
+    ret
 end
 
-function size(A::ToeplitzPlan{<:Any,3,1})
-    if A.dims == (1,)
-        ((size(A.tmp,1)+1) ÷ 2, size(A.tmp,2), size(A.tmp,3))
-    elseif A.dims == (2,)
-        (size(A.tmp,1), (size(A.tmp,2)+1) ÷ 2, size(A.tmp,3))
-    else
-        (size(A.tmp,1), size(A.tmp,2), (size(A.tmp,3)+1) ÷ 2)
+function to_toeplitzplan_size(dims, szs)
+    ret = szs
+    for d in dims
+        ret = muldimby2(d, ret...)
     end
+    ret
 end
 
 
-size(A::ToeplitzPlan{<:Any,2,2}) = ((size(A.tmp,1)+1) ÷ 2, (size(A.tmp,2)+1) ÷ 2)
+size(A::ToeplitzPlan) = toeplitzplan_size(A.dims, size(A.tmp))
+
 
 # based on ToeplitzMatrices.jl
 """
@@ -44,101 +51,31 @@ Return real-valued part of `x` if `T` is a type of a real number, and `x` otherw
 maybereal(::Type, x) = x
 maybereal(::Type{<:Real}, x) = real(x)
 
-function *(A::ToeplitzPlan{T,1}, x::AbstractVector{T}) where T
-    vc,tmp,dft,idft = A.vectors[1],A.tmp, A.dft,A.idft
-    S = eltype(tmp)
-    N = length(tmp)
-    n = length(x)
-    if 2n-1 ≠ N
-        throw(DimensionMismatch("Toeplitz plan does not match size of input"))
-    end
-    copyto!(view(tmp, 1:n), x)
-    fill!(view(tmp, n+1:N), zero(S))
-    dft * tmp
-    tmp .*= vc
-    idft * tmp
-    @inbounds for k = 1:n
-        x[k] = maybereal(T, tmp[k])
-    end
-    x
-end
+function *(A::ToeplitzPlan{T,N}, X::AbstractArray{T,N}) where {T,N}
+    vcs,Y,dft,idft,dims = A.vectors,A.tmp, A.dft,A.idft,A.dims
 
-function *(A::ToeplitzPlan{T,2,1, S}, x::AbstractMatrix{T}) where {T,S}
-    vc,tmp,dft,idft = A.vectors[1],A.tmp, A.dft, A.idft
-    M,N = size(tmp)
-    m,n = size(x)
+    isempty(X) && return X
 
-    if isempty(x)
-        return x
-    end
+    fill!(Y, zero(eltype(Y)))
+    copyto!(view(Y, axes(X)...), X)
 
-    if A.dims == (1,)
-        copyto!(view(tmp, 1:m, :), x)
-        fill!(view(tmp, m+1:M, :), zero(S))
-        if !isempty(tmp)
-            dft * tmp
-        end
-        tmp .= vc .* tmp
-    else
-        @assert A.dims == (2,)
-        copyto!(view(tmp, :, 1:n), x)
-        fill!(view(tmp, :, n+1:N), zero(S))
-        dft * tmp
-        tmp .= tmp .* transpose(vc)
+    # Fourier transform each dimension
+    dft * Y
+    
+    # Multiply by a diagonal matrix along each dimension by permuting
+    # to first dimension
+    for (vc,d) in zip(vcs,dims)
+        Ỹ = PermutedDimsArray(Y, _permfirst(d, N))
+        Ỹ .= vc .* Ỹ
     end
-    idft * tmp
-    x .= maybereal.(T, view(tmp,1:m,1:n))
-end
 
+    # Transform back
+    idft * Y
 
-function *(A::ToeplitzPlan{T,2,2, S}, X::AbstractMatrix{T}) where {T,S}
-    vcs,tmp,dft,idft = A.vectors,A.tmp, A.dft,A.idft
-    vc1,vc2 = vcs
-    M,N = size(tmp)
-    m,n = size(X)
-
-    @assert A.dims == (1,2)
-    copyto!(view(tmp, 1:m, 1:n), X)
-    fill!(view(tmp, m+1:M, :), zero(S))
-    fill!(view(tmp, 1:m, n+1:N), zero(S))
-    dft * tmp
-    tmp .= vc1 .* tmp .* transpose(vc2)
-    idft * tmp
-    @inbounds for k = 1:m, j = 1:n
-        X[k,j] = maybereal(T, tmp[k,j])
-    end
+    X .= maybereal.(T, view(Y, axes(X)...))
     X
 end
 
-function *(A::ToeplitzPlan{T,3,1, S}, x::AbstractArray{T,3}) where {T,S}
-    vc,tmp,dft,idft = A.vectors[1],A.tmp, A.dft,A.idft
-    M,N,L = size(tmp)
-    m,n,l = size(x)
-
-    if A.dims == (1,)
-        copyto!(view(tmp, 1:m, :, :), x)
-        fill!(view(tmp, m+1:M, :, :), zero(S))
-        dft * tmp
-        tmp .= vc .* tmp
-    elseif A.dims == (2,)
-        copyto!(view(tmp, :, 1:n, :), x)
-        fill!(view(tmp, :, n+1:N, :), zero(S))
-        dft * tmp
-        tmp .= tmp .* transpose(vc)
-    else
-        copyto!(view(tmp, :, :, 1:l), x)
-        fill!(view(tmp, :, :, l+1:L), zero(S))
-        dft * tmp
-        tmp .= tmp .* reshape(vc, 1, 1, L)
-    end
-    idft * tmp
-    @inbounds for k = 1:m, j = 1:n, ℓ = 1:l
-        x[k,j,ℓ] = maybereal(T, tmp[k,j,ℓ])
-    end
-    x
-end
-
-
 
 function uppertoeplitz_padvec(v::AbstractVector{T}) where T
     n = length(v)
@@ -151,73 +88,25 @@ function uppertoeplitz_padvec(v::AbstractVector{T}) where T
     tmp
 end
 
-function plan_uppertoeplitz!(v::AbstractVector{T}) where T
-    tmp = uppertoeplitz_padvec(v)
-    dft = plan_fft!(tmp)
-    idft = plan_ifft!(similar(tmp))
-    return ToeplitzPlan{float(T)}(dft * tmp, similar(tmp), dft, idft, (1,))
-end
+safe_fft!(A) = isempty(A) ? A : fft!(A)
 
-# TODO: support different transforms
-# function plan_uppertoeplitz!(v1::AbstractVector{T}, v2::AbstractVector{T}) where T
-#     S = float(T)
-#     m,n = length(v1), length(v2)
-#     tmp = zeros(S, 2m-1, 2n-1)
-#     pv1 = uppertoeplitz_padvec(v1)
-#     pv2 = uppertoeplitz_padvec(v2)
-#     dft = plan_r2r!(tmp, FFTW.R2HC)
-#     return ToeplitzPlan((r2r!(pv1, FFTW.R2HC), r2r!(pv2, FFTW.R2HC)), tmp, dft, 1:2)
-# end
-
-function plan_uppertoeplitz!(v::AbstractVector{T}, szs::NTuple{2,Int}, dim::Int) where T
-    S = complex(float(T))
-    m,n = szs
-    if isone(dim)
-        tmp = zeros(S, max(0,2m-1), n)
-        pv = uppertoeplitz_padvec(v[1:m])
-    else # dim == 2
-        tmp = zeros(S, m, max(0,2n-1))
-        pv = uppertoeplitz_padvec(v[1:n])
-    end
-    if isempty(tmp)
-        # dummy plans just to create type
-        dft = plan_fft!(similar(tmp, 1, 1), dim)
-        idft = plan_ifft!(similar(tmp, 1, 1), dim)
-        ToeplitzPlan{float(T)}(pv, tmp, dft, idft, dim)
-    else
-        dft = plan_fft!(tmp, dim)
-        idft = plan_ifft!(similar(tmp), dim)
-        return ToeplitzPlan{float(T)}(fft!(pv), tmp, dft, idft, dim)
-    end
-end
+uppertoeplitz_vecs(v, dims::AbstractVector, szs) = [safe_fft!(uppertoeplitz_padvec(v[1:szs[d]])) for d in dims]
+uppertoeplitz_vecs(v, dims::Tuple{}, szs) = ()
+uppertoeplitz_vecs(v, dims::Tuple, szs) = (safe_fft!(uppertoeplitz_padvec(v[1:szs[first(dims)]])), uppertoeplitz_vecs(v, tail(dims), szs)...)
+uppertoeplitz_vecs(v, d::Int, szs) = (safe_fft!(uppertoeplitz_padvec(v[1:szs[d]])),)
 
-function plan_uppertoeplitz!(v::AbstractVector{T}, szs::NTuple{3,Int}, dim::Int) where T
-    S = complex(float(T))
-    m,n,l = szs
-    if isone(dim)
-        tmp = zeros(S, 2m-1, n, l)
-        pv = uppertoeplitz_padvec(v[1:m])
-    elseif dim == 2
-        tmp = zeros(S, m, 2n-1, l)
-        pv = uppertoeplitz_padvec(v[1:n])
-    else
-        @assert dim == 3
-        tmp = zeros(S, m, n, 2l-1)
-        pv = uppertoeplitz_padvec(v[1:l])
-    end
-    dft = plan_fft!(tmp, dim)
-    idft = plan_ifft!(similar(tmp), dim)
-    return ToeplitzPlan{float(T)}(fft!(pv), tmp, dft, idft, dim)
-end
 
-function plan_uppertoeplitz!(v::AbstractVector{T}, szs::NTuple{2,Int}, dim=(1,2)) where T
-    @assert dim == (1,2)
+# allow FFT to work by making sure tmp is non-empty
+safe_tmp(tmp::AbstractArray{<:Any,N}) where N = isempty(tmp) ? similar(tmp, ntuple(_ -> 1, Val(N))...) : tmp
+
+function plan_uppertoeplitz!(v::AbstractVector{T}, szs::NTuple{N,Int}, dim=ntuple(identity,Val(N))) where {T,N}
     S = complex(float(T))
-    m,n = szs
-    tmp = zeros(S, 2m-1, 2n-1)
-    pv1 = uppertoeplitz_padvec(v[1:m])
-    pv2 = uppertoeplitz_padvec(v[1:n])
-    dft = plan_fft!(tmp, dim)
-    idft = plan_ifft!(similar(tmp), dim)
-    return ToeplitzPlan{float(T)}((fft!(pv1), fft!(pv2)), tmp, dft, idft, dim)
+    
+    tmp = zeros(S, to_toeplitzplan_size(dim, szs)...)
+    dft = plan_fft!(safe_tmp(tmp), dim)
+    idft = plan_ifft!(safe_tmp(similar(tmp)), dim)
+    
+    return ToeplitzPlan{float(T)}(uppertoeplitz_vecs(v, dim, szs), tmp, dft, idft, dim)
 end
+
+plan_uppertoeplitz!(v::AbstractVector{T}) where T = plan_uppertoeplitz!(v, size(v))
diff --git a/test/toeplitzhankeltests.jl b/test/toeplitzhankeltests.jl
index 19274747..b90ce94b 100644
--- a/test/toeplitzhankeltests.jl
+++ b/test/toeplitzhankeltests.jl
@@ -130,4 +130,37 @@ Random.seed!(0)
         @test norm(v - th_cheb2leg(th_leg2cheb(v)), Inf) ≤ 1E-13
         @test norm(v - th_cheb2leg(th_leg2cheb(v)))/norm(v) ≤ 1E-14
     end
+
+    @testset "tensor" begin
+        X = randn(5,4,3)
+        for trans in (th_leg2cheb, th_cheb2leg)
+            Y = trans(X, 1)
+            for ℓ = 1:size(X,3)
+                @test Y[:,:,ℓ] ≈ trans(X[:,:,ℓ],1)
+            end
+            Y = trans(X, 2)
+            for ℓ = 1:size(X,3)
+                @test Y[:,:,ℓ] ≈ trans(X[:,:,ℓ],2)
+            end
+            Y = trans(X, 3)
+            for j = 1:size(X,2)
+                @test Y[:,j,:] ≈ trans(X[:,j,:],2)
+            end
+
+            Y = trans(X, (1,3))
+            for j = 1:size(X,2)
+                @test Y[:,j,:] ≈ trans(X[:,j,:])
+            end 
+
+            Y = trans(X, 1:3)
+            M = copy(X)
+            for j = 1:size(X,3)
+                M[:,:,j] = trans(M[:,:,j])
+            end
+            for k = 1:size(X,1), j=1:size(X,2)
+                M[k,j,:] = trans(M[k,j,:])
+            end
+            @test M ≈ Y
+        end
+    end
 end
\ No newline at end of file
diff --git a/test/toeplitzplanstests.jl b/test/toeplitzplanstests.jl
index e56d8c3e..6ea6a095 100644
--- a/test/toeplitzplanstests.jl
+++ b/test/toeplitzplanstests.jl
@@ -34,23 +34,86 @@ import FastTransforms: plan_uppertoeplitz!
     @testset "Tensor" begin
         T = [1 2 3; 0 1 2; 0 0 1]
         
-        X = randn(3,3,3)
-        P = plan_uppertoeplitz!([1,2,3], size(X), 1)
-        PX = P * copy(X)
-        for ℓ = 1:size(X,3)
-            @test PX[:,:,ℓ] ≈ T*X[:,:,ℓ]
-        end
+        @testset "3D" begin
+            X = randn(3,3,3)
+            P = plan_uppertoeplitz!([1,2,3], size(X), 1)
+            PX = P * copy(X)
+            for ℓ = 1:size(X,3)
+                @test PX[:,:,ℓ] ≈ T*X[:,:,ℓ]
+            end
 
-        P = plan_uppertoeplitz!([1,2,3], size(X), 2)
-        PX = P * copy(X)
-        for ℓ = 1:size(X,3)
-            @test PX[:,:,ℓ] ≈ X[:,:,ℓ]*T'
+            P = plan_uppertoeplitz!([1,2,3], size(X), 2)
+            PX = P * copy(X)
+            for ℓ = 1:size(X,3)
+                @test PX[:,:,ℓ] ≈ X[:,:,ℓ]*T'
+            end
+
+            P = plan_uppertoeplitz!([1,2,3], size(X), 3)
+            PX = P * copy(X)
+            for j = 1:size(X,2)
+                @test PX[:,j,:] ≈ X[:,j,:]*T'
+            end
+
+            P = plan_uppertoeplitz!([1,2,3], size(X), (1,3))
+            PX = P * copy(X)
+            for j = 1:size(X,2)
+                @test PX[:,j,:] ≈ T*X[:,j,:]*T'
+            end
+
+            P = plan_uppertoeplitz!([1,2,3], size(X), 1:3)
+            PX = P * copy(X)
+            M = copy(X)
+            for j = 1:size(X,3)
+                M[:,:,j] = T*M[:,:,j]*T'
+            end
+            for k = 1:size(X,1)
+                M[k,:,:] = M[k,:,:]*T'
+            end
+            @test M ≈ PX
         end
 
-        P = plan_uppertoeplitz!([1,2,3], size(X), 3)
-        PX = P * copy(X)
-        for j = 1:size(X,2)
-            @test PX[:,j,:] ≈ X[:,j,:]*T'
+        @testset "4D" begin
+            X = randn(3,3,3,3)
+            P = plan_uppertoeplitz!([1,2,3], size(X), 1)
+            PX = P * copy(X)
+            for ℓ = 1:size(X,3), m = 1:size(X,4)
+                @test PX[:,:,ℓ,m] ≈ T*X[:,:,ℓ,m]
+            end
+
+            P = plan_uppertoeplitz!([1,2,3], size(X), 2)
+            PX = P * copy(X)
+            for ℓ = 1:size(X,3), m = 1:size(X,4)
+                @test PX[:,:,ℓ,m] ≈ X[:,:,ℓ,m]*T'
+            end
+
+            P = plan_uppertoeplitz!([1,2,3], size(X), 3)
+            PX = P * copy(X)
+            for j = 1:size(X,2), m = 1:size(X,4)
+                @test PX[:,j,:,m] ≈ X[:,j,:,m]*T'
+            end
+
+            P = plan_uppertoeplitz!([1,2,3], size(X), 4)
+            PX = P * copy(X)
+            for k = 1:size(X,1), j = 1:size(X,2)
+                @test PX[k,j,:,:] ≈ X[k,j,:,:]*T'
+            end
+
+            P = plan_uppertoeplitz!([1,2,3], size(X), (1,3))
+            PX = P * copy(X)
+            for j = 1:size(X,2), m=1:size(X,4)
+                @test PX[:,j,:,m] ≈ T*X[:,j,:,m]*T'
+            end
+
+            P = plan_uppertoeplitz!([1,2,3], size(X), 1:4)
+            PX = P * copy(X)
+            M = copy(X)
+            for ℓ = 1:size(X,3), m = 1:size(X,4)
+                M[:,:,ℓ,m] = T*M[:,:,ℓ,m]*T'
+            end
+            for k = 1:size(X,1), j = 1:size(X,2)
+                M[k,j,:,:] = T*M[k,j,:,:]*T'
+            end
+            @test M ≈ PX
         end
     end
 

From 90d1daefccff78ac9f45ee0d1ba487d0f1915569 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 13 Nov 2023 14:41:38 +0000
Subject: [PATCH 157/222] Cheb2LegTH <: Plan (#234)

* Cheb2LegTH <: Plan

* avoid stack overflow in different sizes
---
 src/chebyshevtransform.jl | 4 ++++
 src/toeplitzhankel.jl     | 2 +-
 test/chebyshevtests.jl    | 5 +++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 01dd61af..541d9933 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -2,6 +2,8 @@
 
 abstract type ChebyshevPlan{T} <: Plan{T} end
 
+*(P::ChebyshevPlan{T}, x::AbstractArray{T}) where T = error("Plan applied to wrong size array")
+
 size(P::ChebyshevPlan) = isdefined(P, :plan) ? size(P.plan) : (0,)
 length(P::ChebyshevPlan) = isdefined(P, :plan) ? length(P.plan) : 0
 
@@ -151,6 +153,8 @@ end
     ldiv!(_prod_size(size(y), d), y)
 end
 
+
+
 function *(P::ChebyshevTransformPlan{T,1,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
     isempty(x) && return x
 
diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index fef934f6..383ff9ae 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -118,7 +118,7 @@ end
 
 
 
-struct ChebyshevToLegendrePlanTH{TH}
+struct ChebyshevToLegendrePlanTH{S,TH<:ToeplitzHankelPlan{S}} <: Plan{S}
     toeplitzhankel::TH
 end
 
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index d6f704bf..051d5f51 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -462,4 +462,9 @@ using FastTransforms, Test
             @test F \ (F*X) ≈ F * (F\X) ≈ X
         end
     end
+
+    @testset "incompatible shapes" begin
+        @test_throws ErrorException plan_chebyshevtransform(randn(5)) * randn(5,5)
+        @test_throws ErrorException plan_ichebyshevtransform(randn(5)) * randn(5,5)
+    end
 end

From e1db1ca2c4f3516ff3ac7beaa80cec197a84444d Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Tue, 14 Nov 2023 14:07:23 +0000
Subject: [PATCH 158/222] 4D Chebyshev transform (#235)

---
 Project.toml              |   2 +-
 src/chebyshevtransform.jl |  84 ++++-------------
 test/chebyshevtests.jl    | 183 ++++++++++++++++++++------------------
 3 files changed, 113 insertions(+), 156 deletions(-)

diff --git a/Project.toml b/Project.toml
index 7b29f896..3d45e5c3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.12"
+version = "0.15.13"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 541d9933..d55b52c3 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -58,76 +58,22 @@ plan_chebyshevtransform(x::AbstractArray, dims...; kws...) = plan_chebyshevtrans
 @inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::AbstractArray) where T = mul!(y, P, convert(Array{T}, x))
 
 
+for op in (:ldiv, :lmul)
+    op_dim_begin! = Symbol(string(op) * "_dim_begin!")
+    op_dim_end! = Symbol(string(op) * "_dim_end!")
+    op! = Symbol(string(op) * "!")
+    @eval begin
+        function $op_dim_begin!(α, d::Number, y::AbstractArray{<:Any,N}) where N
+            # scale just the d-th dimension by permuting it to the first
+            ỹ = PermutedDimsArray(y, _permfirst(d, N))
+            $op!(α, view(ỹ, 1, ntuple(_ -> :, Val(N-1))...))
+        end
 
-ldiv_dim_begin!(α, d::Number, y::AbstractVector) = y[1] /= α
-function ldiv_dim_begin!(α, d::Number, y::AbstractMatrix)
-    if isone(d)
-        ldiv!(α, @view(y[1,:]))
-    else
-        ldiv!(α, @view(y[:,1]))
-    end
-end
-function ldiv_dim_begin!(α, d::Number, y::AbstractArray{<:Any,3})
-    if isone(d)
-        ldiv!(α, @view(y[1,:,:]))
-    elseif d == 2
-        ldiv!(α, @view(y[:,1,:]))
-    else # d == 3
-        ldiv!(α, @view(y[:,:,1]))
-    end
-end
-
-ldiv_dim_end!(α, d::Number, y::AbstractVector) = y[end] /= α
-function ldiv_dim_end!(α, d::Number, y::AbstractMatrix)
-    if isone(d)
-        ldiv!(α, @view(y[end,:]))
-    else
-        ldiv!(α, @view(y[:,end]))
-    end
-end
-function ldiv_dim_end!(α, d::Number, y::AbstractArray{<:Any,3})
-    if isone(d)
-        ldiv!(α, @view(y[end,:,:]))
-    elseif d == 2
-        ldiv!(α, @view(y[:,end,:]))
-    else # d == 3
-        ldiv!(α, @view(y[:,:,end]))
-    end
-end
-
-lmul_dim_begin!(α, d::Number, y::AbstractVector) = y[1] *= α
-function lmul_dim_begin!(α, d::Number, y::AbstractMatrix)
-    if isone(d)
-        lmul!(α, @view(y[1,:]))
-    else
-        lmul!(α, @view(y[:,1]))
-    end
-end
-function lmul_dim_begin!(α, d::Number, y::AbstractArray{<:Any,3})
-    if isone(d)
-        lmul!(α, @view(y[1,:,:]))
-    elseif d == 2
-        lmul!(α, @view(y[:,1,:]))
-    else # d == 3
-        lmul!(α, @view(y[:,:,1]))
-    end
-end
-
-lmul_dim_end!(α, d::Number, y::AbstractVector) = y[end] *= α
-function lmul_dim_end!(α, d::Number, y::AbstractMatrix)
-    if isone(d)
-        lmul!(α, @view(y[end,:]))
-    else
-        lmul!(α, @view(y[:,end]))
-    end
-end
-function lmul_dim_end!(α, d::Number, y::AbstractArray{<:Any,3})
-    if isone(d)
-        lmul!(α, @view(y[end,:,:]))
-    elseif d == 2
-        lmul!(α, @view(y[:,end,:]))
-    else # d == 3
-        lmul!(α, @view(y[:,:,end]))
+        function $op_dim_end!(α, d::Number, y::AbstractArray{<:Any,N}) where N
+            # scale just the d-th dimension by permuting it to the first
+            ỹ = PermutedDimsArray(y, _permfirst(d, N))
+            $op!(α, view(ỹ, size(ỹ,1), ntuple(_ -> :, Val(N-1))...))
+        end
     end
 end
 
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 051d5f51..2a82978c 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -322,96 +322,107 @@ using FastTransforms, Test
     end
 
     @testset "tensor" begin
-        X = randn(4,5,6)
-        X̃ = similar(X)
-        @testset "chebyshevtransform" begin
-            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevtransform(X[:,k,j]) end
-            @test @inferred(chebyshevtransform(X,1)) ≈ @inferred(chebyshevtransform!(copy(X),1)) ≈ X̃
-            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevtransform(X[k,:,j]) end
-            @test chebyshevtransform(X,2) ≈ chebyshevtransform!(copy(X),2) ≈ X̃
-            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevtransform(X[k,j,:]) end
-            @test chebyshevtransform(X,3) ≈ chebyshevtransform!(copy(X),3) ≈ X̃
-
-            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevtransform(X[:,k,j],Val(2)) end
-            @test @inferred(chebyshevtransform(X,Val(2),1)) ≈ @inferred(chebyshevtransform!(copy(X),Val(2),1)) ≈ X̃
-            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevtransform(X[k,:,j],Val(2)) end
-            @test chebyshevtransform(X,Val(2),2) ≈ chebyshevtransform!(copy(X),Val(2),2) ≈ X̃
-            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevtransform(X[k,j,:],Val(2)) end
-            @test chebyshevtransform(X,Val(2),3) ≈ chebyshevtransform!(copy(X),Val(2),3) ≈ X̃
-
-            @test @inferred(chebyshevtransform(X)) ≈ @inferred(chebyshevtransform!(copy(X))) ≈ chebyshevtransform(chebyshevtransform(chebyshevtransform(X,1),2),3)
-            @test @inferred(chebyshevtransform(X,Val(2))) ≈ @inferred(chebyshevtransform!(copy(X),Val(2))) ≈ chebyshevtransform(chebyshevtransform(chebyshevtransform(X,Val(2),1),Val(2),2),Val(2),3)
-        end
-
-        @testset "ichebyshevtransform" begin
-            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevtransform(X[:,k,j]) end
-            @test @inferred(ichebyshevtransform(X,1)) ≈ @inferred(ichebyshevtransform!(copy(X),1)) ≈ X̃
-            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevtransform(X[k,:,j]) end
-            @test ichebyshevtransform(X,2) ≈ ichebyshevtransform!(copy(X),2) ≈ X̃
-            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevtransform(X[k,j,:]) end
-            @test ichebyshevtransform(X,3) ≈ ichebyshevtransform!(copy(X),3) ≈ X̃
-
-            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevtransform(X[:,k,j],Val(2)) end
-            @test @inferred(ichebyshevtransform(X,Val(2),1)) ≈ @inferred(ichebyshevtransform!(copy(X),Val(2),1)) ≈ X̃
-            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevtransform(X[k,:,j],Val(2)) end
-            @test ichebyshevtransform(X,Val(2),2) ≈ ichebyshevtransform!(copy(X),Val(2),2) ≈ X̃
-            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevtransform(X[k,j,:],Val(2)) end
-            @test ichebyshevtransform(X,Val(2),3) ≈ ichebyshevtransform!(copy(X),Val(2),3) ≈ X̃
-
-            @test @inferred(ichebyshevtransform(X)) ≈ @inferred(ichebyshevtransform!(copy(X))) ≈ ichebyshevtransform(ichebyshevtransform(ichebyshevtransform(X,1),2),3)
-            @test @inferred(ichebyshevtransform(X,Val(2))) ≈ @inferred(ichebyshevtransform!(copy(X),Val(2))) ≈ ichebyshevtransform(ichebyshevtransform(ichebyshevtransform(X,Val(2),1),Val(2),2),Val(2),3)
-
-            @test ichebyshevtransform(chebyshevtransform(X)) ≈ X
-            @test chebyshevtransform(ichebyshevtransform(X)) ≈ X
-        end
-    
-        @testset "chebyshevutransform" begin
-            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevutransform(X[:,k,j]) end
-            @test @inferred(chebyshevutransform(X,1)) ≈ @inferred(chebyshevutransform!(copy(X),1)) ≈ X̃
-            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevutransform(X[k,:,j]) end
-            @test chebyshevutransform(X,2) ≈ chebyshevutransform!(copy(X),2) ≈ X̃
-            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevutransform(X[k,j,:]) end
-            @test chebyshevutransform(X,3) ≈ chebyshevutransform!(copy(X),3) ≈ X̃
-
-            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevutransform(X[:,k,j],Val(2)) end
-            @test @inferred(chebyshevutransform(X,Val(2),1)) ≈ @inferred(chebyshevutransform!(copy(X),Val(2),1)) ≈ X̃
-            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevutransform(X[k,:,j],Val(2)) end
-            @test chebyshevutransform(X,Val(2),2) ≈ chebyshevutransform!(copy(X),Val(2),2) ≈ X̃
-            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevutransform(X[k,j,:],Val(2)) end
-            @test chebyshevutransform(X,Val(2),3) ≈ chebyshevutransform!(copy(X),Val(2),3) ≈ X̃
-
-            @test @inferred(chebyshevutransform(X)) ≈ @inferred(chebyshevutransform!(copy(X))) ≈ chebyshevutransform(chebyshevutransform(chebyshevutransform(X,1),2),3)
-            @test @inferred(chebyshevutransform(X,Val(2))) ≈ @inferred(chebyshevutransform!(copy(X),Val(2))) ≈ chebyshevutransform(chebyshevutransform(chebyshevutransform(X,Val(2),1),Val(2),2),Val(2),3)
+        @testset "3D" begin
+            X = randn(4,5,6)
+            X̃ = similar(X)
+            @testset "chebyshevtransform" begin
+                for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevtransform(X[:,k,j]) end
+                @test @inferred(chebyshevtransform(X,1)) ≈ @inferred(chebyshevtransform!(copy(X),1)) ≈ X̃
+                for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevtransform(X[k,:,j]) end
+                @test chebyshevtransform(X,2) ≈ chebyshevtransform!(copy(X),2) ≈ X̃
+                for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevtransform(X[k,j,:]) end
+                @test chebyshevtransform(X,3) ≈ chebyshevtransform!(copy(X),3) ≈ X̃
+
+                for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevtransform(X[:,k,j],Val(2)) end
+                @test @inferred(chebyshevtransform(X,Val(2),1)) ≈ @inferred(chebyshevtransform!(copy(X),Val(2),1)) ≈ X̃
+                for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevtransform(X[k,:,j],Val(2)) end
+                @test chebyshevtransform(X,Val(2),2) ≈ chebyshevtransform!(copy(X),Val(2),2) ≈ X̃
+                for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevtransform(X[k,j,:],Val(2)) end
+                @test chebyshevtransform(X,Val(2),3) ≈ chebyshevtransform!(copy(X),Val(2),3) ≈ X̃
+
+                @test @inferred(chebyshevtransform(X)) ≈ @inferred(chebyshevtransform!(copy(X))) ≈ chebyshevtransform(chebyshevtransform(chebyshevtransform(X,1),2),3)
+                @test @inferred(chebyshevtransform(X,Val(2))) ≈ @inferred(chebyshevtransform!(copy(X),Val(2))) ≈ chebyshevtransform(chebyshevtransform(chebyshevtransform(X,Val(2),1),Val(2),2),Val(2),3)
+            end
+
+            @testset "ichebyshevtransform" begin
+                for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevtransform(X[:,k,j]) end
+                @test @inferred(ichebyshevtransform(X,1)) ≈ @inferred(ichebyshevtransform!(copy(X),1)) ≈ X̃
+                for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevtransform(X[k,:,j]) end
+                @test ichebyshevtransform(X,2) ≈ ichebyshevtransform!(copy(X),2) ≈ X̃
+                for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevtransform(X[k,j,:]) end
+                @test ichebyshevtransform(X,3) ≈ ichebyshevtransform!(copy(X),3) ≈ X̃
+
+                for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevtransform(X[:,k,j],Val(2)) end
+                @test @inferred(ichebyshevtransform(X,Val(2),1)) ≈ @inferred(ichebyshevtransform!(copy(X),Val(2),1)) ≈ X̃
+                for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevtransform(X[k,:,j],Val(2)) end
+                @test ichebyshevtransform(X,Val(2),2) ≈ ichebyshevtransform!(copy(X),Val(2),2) ≈ X̃
+                for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevtransform(X[k,j,:],Val(2)) end
+                @test ichebyshevtransform(X,Val(2),3) ≈ ichebyshevtransform!(copy(X),Val(2),3) ≈ X̃
+
+                @test @inferred(ichebyshevtransform(X)) ≈ @inferred(ichebyshevtransform!(copy(X))) ≈ ichebyshevtransform(ichebyshevtransform(ichebyshevtransform(X,1),2),3)
+                @test @inferred(ichebyshevtransform(X,Val(2))) ≈ @inferred(ichebyshevtransform!(copy(X),Val(2))) ≈ ichebyshevtransform(ichebyshevtransform(ichebyshevtransform(X,Val(2),1),Val(2),2),Val(2),3)
+
+                @test ichebyshevtransform(chebyshevtransform(X)) ≈ X
+                @test chebyshevtransform(ichebyshevtransform(X)) ≈ X
+            end
+        
+            @testset "chebyshevutransform" begin
+                for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevutransform(X[:,k,j]) end
+                @test @inferred(chebyshevutransform(X,1)) ≈ @inferred(chebyshevutransform!(copy(X),1)) ≈ X̃
+                for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevutransform(X[k,:,j]) end
+                @test chebyshevutransform(X,2) ≈ chebyshevutransform!(copy(X),2) ≈ X̃
+                for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevutransform(X[k,j,:]) end
+                @test chebyshevutransform(X,3) ≈ chebyshevutransform!(copy(X),3) ≈ X̃
+
+                for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = chebyshevutransform(X[:,k,j],Val(2)) end
+                @test @inferred(chebyshevutransform(X,Val(2),1)) ≈ @inferred(chebyshevutransform!(copy(X),Val(2),1)) ≈ X̃
+                for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = chebyshevutransform(X[k,:,j],Val(2)) end
+                @test chebyshevutransform(X,Val(2),2) ≈ chebyshevutransform!(copy(X),Val(2),2) ≈ X̃
+                for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = chebyshevutransform(X[k,j,:],Val(2)) end
+                @test chebyshevutransform(X,Val(2),3) ≈ chebyshevutransform!(copy(X),Val(2),3) ≈ X̃
+
+                @test @inferred(chebyshevutransform(X)) ≈ @inferred(chebyshevutransform!(copy(X))) ≈ chebyshevutransform(chebyshevutransform(chebyshevutransform(X,1),2),3)
+                @test @inferred(chebyshevutransform(X,Val(2))) ≈ @inferred(chebyshevutransform!(copy(X),Val(2))) ≈ chebyshevutransform(chebyshevutransform(chebyshevutransform(X,Val(2),1),Val(2),2),Val(2),3)
+            end
+
+            @testset "ichebyshevutransform" begin
+                for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevutransform(X[:,k,j]) end
+                @test @inferred(ichebyshevutransform(X,1)) ≈ @inferred(ichebyshevutransform!(copy(X),1)) ≈ X̃
+                for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevutransform(X[k,:,j]) end
+                @test ichebyshevutransform(X,2) ≈ ichebyshevutransform!(copy(X),2) ≈ X̃
+                for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevutransform(X[k,j,:]) end
+                @test ichebyshevutransform(X,3) ≈ ichebyshevutransform!(copy(X),3) ≈ X̃
+
+                for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevutransform(X[:,k,j],Val(2)) end
+                @test @inferred(ichebyshevutransform(X,Val(2),1)) ≈ @inferred(ichebyshevutransform!(copy(X),Val(2),1)) ≈ X̃
+                for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevutransform(X[k,:,j],Val(2)) end
+                @test ichebyshevutransform(X,Val(2),2) ≈ ichebyshevutransform!(copy(X),Val(2),2) ≈ X̃
+                for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevutransform(X[k,j,:],Val(2)) end
+                @test ichebyshevutransform(X,Val(2),3) ≈ ichebyshevutransform!(copy(X),Val(2),3) ≈ X̃
+
+                @test @inferred(ichebyshevutransform(X)) ≈ @inferred(ichebyshevutransform!(copy(X))) ≈ ichebyshevutransform(ichebyshevutransform(ichebyshevutransform(X,1),2),3)
+                @test @inferred(ichebyshevutransform(X,Val(2))) ≈ @inferred(ichebyshevutransform!(copy(X),Val(2))) ≈ ichebyshevutransform(ichebyshevutransform(ichebyshevutransform(X,Val(2),1),Val(2),2),Val(2),3)
+
+                @test ichebyshevutransform(chebyshevutransform(X)) ≈ X
+                @test chebyshevutransform(ichebyshevutransform(X)) ≈ X
+            end
+
+            X = randn(1,1,1)
+            @test chebyshevtransform!(copy(X), Val(1)) == ichebyshevtransform!(copy(X), Val(1)) == X
+            @test_throws ArgumentError chebyshevtransform!(copy(X), Val(2))
+            @test_throws ArgumentError ichebyshevtransform!(copy(X), Val(2))
         end
 
-        @testset "ichebyshevutransform" begin
-            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevutransform(X[:,k,j]) end
-            @test @inferred(ichebyshevutransform(X,1)) ≈ @inferred(ichebyshevutransform!(copy(X),1)) ≈ X̃
-            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevutransform(X[k,:,j]) end
-            @test ichebyshevutransform(X,2) ≈ ichebyshevutransform!(copy(X),2) ≈ X̃
-            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevutransform(X[k,j,:]) end
-            @test ichebyshevutransform(X,3) ≈ ichebyshevutransform!(copy(X),3) ≈ X̃
-
-            for k = axes(X,2), j = axes(X,3) X̃[:,k,j] = ichebyshevutransform(X[:,k,j],Val(2)) end
-            @test @inferred(ichebyshevutransform(X,Val(2),1)) ≈ @inferred(ichebyshevutransform!(copy(X),Val(2),1)) ≈ X̃
-            for k = axes(X,1), j = axes(X,3) X̃[k,:,j] = ichebyshevutransform(X[k,:,j],Val(2)) end
-            @test ichebyshevutransform(X,Val(2),2) ≈ ichebyshevutransform!(copy(X),Val(2),2) ≈ X̃
-            for k = axes(X,1), j = axes(X,2) X̃[k,j,:] = ichebyshevutransform(X[k,j,:],Val(2)) end
-            @test ichebyshevutransform(X,Val(2),3) ≈ ichebyshevutransform!(copy(X),Val(2),3) ≈ X̃
-
-            @test @inferred(ichebyshevutransform(X)) ≈ @inferred(ichebyshevutransform!(copy(X))) ≈ ichebyshevutransform(ichebyshevutransform(ichebyshevutransform(X,1),2),3)
-            @test @inferred(ichebyshevutransform(X,Val(2))) ≈ @inferred(ichebyshevutransform!(copy(X),Val(2))) ≈ ichebyshevutransform(ichebyshevutransform(ichebyshevutransform(X,Val(2),1),Val(2),2),Val(2),3)
-
-            @test ichebyshevutransform(chebyshevutransform(X)) ≈ X
-            @test chebyshevutransform(ichebyshevutransform(X)) ≈ X
+        @testset "4D" begin
+            X = randn(2,3,4,5)
+            X̃ = similar(X)
+            for trans in (chebyshevtransform, ichebyshevtransform, chebyshevutransform, ichebyshevutransform)
+                for k = axes(X,2), j = axes(X,3), l = axes(X,4) X̃[:,k,j,l] = trans(X[:,k,j,l]) end
+                @test @inferred(trans(X,1)) ≈ X̃
+                @test @inferred(trans(X)) ≈ trans(trans(trans(trans(X,1),2),3),4)
+            end
         end
-
-        X = randn(1,1,1)
-        @test chebyshevtransform!(copy(X), Val(1)) == ichebyshevtransform!(copy(X), Val(1)) == X
-        @test_throws ArgumentError chebyshevtransform!(copy(X), Val(2))
-        @test_throws ArgumentError ichebyshevtransform!(copy(X), Val(2))
     end
-
     @testset "Integer" begin
         @test chebyshevtransform([1,2,3]) == chebyshevtransform([1.,2,3])
         @test chebyshevtransform([1,2,3], Val(2)) == chebyshevtransform([1.,2,3], Val(2))

From a1024f208f64ee8a82b3c19ac4e5699c4a8e41d9 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Thu, 16 Nov 2023 10:47:32 +0000
Subject: [PATCH 159/222] Support planning Chebyshev via specifying type and
 size (#236)

* Support planning Chebyshev via specifying type and size

* Update Project.toml
---
 Project.toml              |  2 +-
 src/chebyshevtransform.jl | 23 +++++++++++------------
 test/chebyshevtests.jl    |  6 ++++++
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3d45e5c3..9a985c5e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.13"
+version = "0.15.14"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index d55b52c3..5303868e 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -49,9 +49,6 @@ function plan_chebyshevtransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws..
     ChebyshevTransformPlan{T,2}(FFTW.plan_r2r(x, SECONDKIND, dims...; kws...))
 end
 
-plan_chebyshevtransform!(x::AbstractArray, dims...; kws...) = plan_chebyshevtransform!(x, Val(1), dims...; kws...)
-plan_chebyshevtransform(x::AbstractArray, dims...; kws...) = plan_chebyshevtransform(x, Val(1), dims...; kws...)
-
 
 # convert x if necessary
 @inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::StridedArray{T}) where T = mul!(y, P, x)
@@ -221,9 +218,6 @@ function plan_ichebyshevtransform(x::AbstractArray{T}, ::Val{2}, dims...; kws...
     inv(plan_chebyshevtransform(x, Val(2), dims...; kws...))
 end
 
-plan_ichebyshevtransform!(x::AbstractArray, dims...; kws...) = plan_ichebyshevtransform!(x, Val(1), dims...; kws...)
-plan_ichebyshevtransform(x::AbstractArray, dims...; kws...) = plan_ichebyshevtransform(x, Val(1), dims...; kws...)
-
 @inline function _icheb1_prescale!(d::Number, x::AbstractArray)
     lmul_dim_begin!(2, d, x)
     x
@@ -369,9 +363,6 @@ function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws.
     ChebyshevUTransformPlan{T,2}(FFTW.plan_r2r(x, USECONDKIND, dims...; kws...))
 end
 
-plan_chebyshevutransform!(x::AbstractArray, dims...; kws...) = plan_chebyshevutransform!(x, Val(1), dims...; kws...)
-plan_chebyshevutransform(x::AbstractArray, dims...; kws...) = plan_chebyshevutransform(x, Val(1), dims...; kws...)
-
 
 _permfirst(d, N) = [d; 1:d-1; d+1:N]
 
@@ -526,9 +517,6 @@ function plan_ichebyshevutransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws
 end
 
 
-plan_ichebyshevutransform!(x::AbstractArray, dims...; kws...) = plan_ichebyshevutransform!(x, Val(1), dims...; kws...)
-plan_ichebyshevutransform(x::AbstractArray, dims...; kws...) = plan_ichebyshevutransform(x, Val(1), dims...; kws...)
-
 # second kind Chebyshev transforms share a plan with their inverse
 # so we support this via inv
 inv(P::ChebyshevUTransformPlan{T,2}) where {T} = IChebyshevUTransformPlan{T,2}(P.plan)
@@ -744,3 +732,14 @@ end
     copyto!(x, IChebyshevTransformPlan{T,1,Nothing,false,N,R}() * x)
 # *(P::IChebyshevTransformPlan{T,SECONDKIND,false,Nothing}, x::AbstractVector{T}) where T =
 #     IChebyshevTransformPlan{T,SECONDKIND,true,Nothing}() * copy(x)
+
+
+for pln in (:plan_chebyshevtransform!, :plan_chebyshevtransform, 
+            :plan_chebyshevutransform!, :plan_chebyshevutransform, 
+            :plan_ichebyshevutransform, :plan_ichebyshevutransform!, 
+            :plan_ichebyshevtransform, :plan_ichebyshevtransform!)
+    @eval begin
+        $pln(x::AbstractArray, dims...; kws...) = $pln(x, Val(1), dims...; kws...)
+        $pln(::Type{T}, szs, dims...; kwds...) where T = $pln(Array{T}(undef, szs...), dims...; kwds...)
+    end
+end
\ No newline at end of file
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 2a82978c..3112472d 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -478,4 +478,10 @@ using FastTransforms, Test
         @test_throws ErrorException plan_chebyshevtransform(randn(5)) * randn(5,5)
         @test_throws ErrorException plan_ichebyshevtransform(randn(5)) * randn(5,5)
     end
+
+    @testset "plan via size" begin
+        X = randn(3,4)
+        p = plan_chebyshevtransform(Float64, (3,4))
+        @test p * X == chebyshevtransform(X)
+    end
 end

From e3d47a2d429b2c4425a520fb4bb4e3da4e3f10d4 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Thu, 16 Nov 2023 14:40:41 +0000
Subject: [PATCH 160/222] Fix bug in ultra2ultra with integer difference (#237)

---
 Project.toml                | 2 +-
 src/toeplitzhankel.jl       | 2 +-
 test/toeplitzhankeltests.jl | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 9a985c5e..022e0477 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.14"
+version = "0.15.15"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index 383ff9ae..f7825466 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -299,7 +299,7 @@ function plan_th_ultra2ultra!(::Type{S}, mn, λ₁, λ₂, dims) where {S}
 
     if isapproxinteger(λ₂ - λ₁)
         # TODO: don't make extra plan
-        plans = typeof(_good_plan_th_ultra2ultra!(S, mn, λ₁+0.1, λ₂, dims))[]
+        plans = typeof(_good_plan_th_ultra2ultra!(S, mn, λ₂+0.1, λ₂, dims))[]
     else
         plans = [_good_plan_th_ultra2ultra!(S, mn, λ₁, c, dims)]
     end
diff --git a/test/toeplitzhankeltests.jl b/test/toeplitzhankeltests.jl
index b90ce94b..a8ebdc40 100644
--- a/test/toeplitzhankeltests.jl
+++ b/test/toeplitzhankeltests.jl
@@ -15,6 +15,7 @@ Random.seed!(0)
         @test th_ultra2ultra(x,1, 2) ≈ lib_ultra2ultra(x, 1, 2)
         @test th_ultra2ultra(x,0.1, 2.2) ≈ lib_ultra2ultra(x, 0.1, 2.2)
         @test th_ultra2ultra(x, 2.2, 0.1) ≈ lib_ultra2ultra(x, 2.2, 0.1)
+        @test th_ultra2ultra(x, 1, 3) ≈ lib_ultra2ultra(x, 1, 3)
         @test @inferred(th_jac2jac(x,0.1, 0.2,0.1,0.4)) ≈ lib_jac2jac(x, 0.1, 0.2,0.1,0.4)
         @test th_jac2jac(x,0.1, 0.2,0.3,0.2) ≈ lib_jac2jac(x, 0.1, 0.2,0.3,0.2)
         @test th_jac2jac(x,0.1, 0.2,0.3,0.4) ≈ lib_jac2jac(x, 0.1, 0.2,0.3,0.4)

From 9f135a895054c81fc105fd10058bbc8cfca1467d Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 4 Dec 2023 16:55:19 +0000
Subject: [PATCH 161/222] Support size and \ for cheb2leg (#239)

---
 Project.toml                |  2 +-
 src/toeplitzhankel.jl       |  8 ++++++++
 test/toeplitzhankeltests.jl | 19 +++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 022e0477..2f5b3613 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.15"
+version = "0.15.16"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index f7825466..78cc43fa 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -33,6 +33,8 @@ ToeplitzHankelPlan{S,N,M}(T::TP, L::LowR, R::LowR, dims::Dims) where {S,N,M,LowR
 ToeplitzHankelPlan{S,N}(T, L, R, dims) where {S,N} = ToeplitzHankelPlan{S,N,N+1}(T, L, R, dims)
 ToeplitzHankelPlan(T::ToeplitzPlan{S,M}, L::Matrix, R::Matrix, dims=1) where {S,M} = ToeplitzHankelPlan{S,M-1,M}((T,), (L,), (R,), dims)
 
+size(TH::ToeplitzHankelPlan) = size(first(TH.T))
+
 
 _reshape_broadcast(d, R, ::Val{N}, M) where N = reshape(R,ntuple(k -> k == d ? size(R,1) : 1, Val(N))...,M)
 function _th_applymul!(d, v::AbstractArray{<:Any,N}, T, L, R, tmp) where N
@@ -52,6 +54,8 @@ function *(P::ToeplitzHankelPlan{<:Any,N}, v::AbstractArray{<:Any,N}) where N
     v
 end
 
+*(P::ToeplitzHankelPlan, v::AbstractArray) = error("plan applied to wrong-sized array")
+
 
 # partial cholesky for a Hankel matrix
 
@@ -158,6 +162,10 @@ function *(P::ChebyshevToLegendrePlanTH, V::AbstractArray{<:Any,N}) where N
     V
 end
 
+_add1tod(d::Integer, a, b...) = d == 1 ? (a+1, b...) : (a, _add1tod(d-1, b...)...)
+_add1tod(d, a, b...) = _add1tod(first(d), a, b...)
+size(P::ChebyshevToLegendrePlanTH) = Base.front(_add1tod(P.toeplitzhankel.dims, size(first(P.toeplitzhankel.T))...))
+inv(P::ChebyshevToLegendrePlanTH{T}) where T = plan_th_leg2cheb!(T, size(P), P.toeplitzhankel.dims)
 
 
 function _leg2chebTH_TLC(::Type{S}, mn, d) where S
diff --git a/test/toeplitzhankeltests.jl b/test/toeplitzhankeltests.jl
index a8ebdc40..0b8731bf 100644
--- a/test/toeplitzhankeltests.jl
+++ b/test/toeplitzhankeltests.jl
@@ -164,4 +164,23 @@ Random.seed!(0)
             @test M ≈ Y
         end
     end
+
+    @testset "inv" begin
+        x = randn(10)
+        pl = plan_th_cheb2leg!(x)
+        @test size(pl) == (10,)
+        @test pl\(pl*x) ≈ x
+
+        X = randn(10,3)
+        for pl in (plan_th_cheb2leg!(X), plan_th_cheb2leg!(X, 1), plan_th_cheb2leg!(X, 2))
+            @test size(pl) == (10,3)
+            @test pl\(pl*copy(X)) ≈ X
+        end
+
+        X = randn(10,3,5)
+        for pl in (plan_th_cheb2leg!(X), plan_th_cheb2leg!(X, 1), plan_th_cheb2leg!(X, 2),  plan_th_cheb2leg!(X, 3))
+            @test size(pl) == (10,3,5)
+            @test pl\(pl*copy(X)) ≈ X
+        end
+    end
 end
\ No newline at end of file

From adf63f496fdfc4c36d99e8a5b35e7b7d8ff6d368 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Tue, 30 Jan 2024 23:15:50 +0530
Subject: [PATCH 162/222] Remove uses of PermutedDimsArray

---
 src/chebyshevtransform.jl | 124 +++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 69 deletions(-)

diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 5303868e..9d2f90c8 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -54,22 +54,31 @@ end
 @inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::StridedArray{T}) where T = mul!(y, P, x)
 @inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::AbstractArray) where T = mul!(y, P, convert(Array{T}, x))
 
+function applydim!(op!, X::AbstractArray, Rpre, Rpost, ind)
+    for Ipost in Rpost, Ipre in Rpre
+        v = view(X, Ipre, ind, Ipost)
+        op!(v)
+    end
+    X
+end
 
 for op in (:ldiv, :lmul)
-    op_dim_begin! = Symbol(string(op) * "_dim_begin!")
-    op_dim_end! = Symbol(string(op) * "_dim_end!")
-    op! = Symbol(string(op) * "!")
+    op_dim_begin! = Symbol(op, :_dim_begin!)
+    op_dim_end! = Symbol(op, :_dim_end!)
+    op! = Symbol(op, :!)
     @eval begin
-        function $op_dim_begin!(α, d::Number, y::AbstractArray{<:Any,N}) where N
+        function $op_dim_begin!(α, d::Number, y::AbstractArray)
             # scale just the d-th dimension by permuting it to the first
-            ỹ = PermutedDimsArray(y, _permfirst(d, N))
-            $op!(α, view(ỹ, 1, ntuple(_ -> :, Val(N-1))...))
+            Rpre = CartesianIndices(axes(y)[1:d-1])
+            Rpost = CartesianIndices(axes(y)[d+1:end])
+            applydim!(v -> $op!(α, v), y, Rpre, Rpost, 1)
         end
 
-        function $op_dim_end!(α, d::Number, y::AbstractArray{<:Any,N}) where N
+        function $op_dim_end!(α, d::Number, y::AbstractArray)
             # scale just the d-th dimension by permuting it to the first
-            ỹ = PermutedDimsArray(y, _permfirst(d, N))
-            $op!(α, view(ỹ, size(ỹ,1), ntuple(_ -> :, Val(N-1))...))
+            Rpre = CartesianIndices(axes(y)[1:d-1])
+            Rpost = CartesianIndices(axes(y)[d+1:end])
+            applydim!(v -> $op!(α, v), y, Rpre, Rpost, size(y, d))
         end
     end
 end
@@ -366,32 +375,35 @@ end
 
 _permfirst(d, N) = [d; 1:d-1; d+1:N]
 
-@inline function _chebu1_prescale!(d::Number, X::AbstractArray{T,N}) where {T,N}
-    X̃ = PermutedDimsArray(X, _permfirst(d, N))
-    m = size(X̃,1)
-    X̃ .= (sinpi.(one(T)/(2m) .+ ((1:m) .- one(T))/m) ./ m) .* X̃
-    X
-end
-
-@inline function _chebu1_prescale!(d, y::AbstractArray)
-    for k in d
-        _chebu1_prescale!(k, y)
+for f in [:_chebu1_prescale!, :_chebu1_postscale!, :_chebu2_prescale!, :_chebu2_postscale!,
+            :_ichebu1_postscale!]
+    _f = Symbol(:_, f)
+    @eval begin
+        @inline function $f(d::Number, X::AbstractArray)
+            Rpre = CartesianIndices(axes(X)[1:d-1])
+            Rpost = CartesianIndices(axes(X)[d+1:end])
+            $_f(d, X, Rpre, Rpost)
+            X
+        end
+        @inline function $f(d, y::AbstractArray)
+            for k in d
+                $f(k, y)
+            end
+            y
+        end
     end
-    y
 end
 
-@inline function _chebu1_postscale!(d::Number, X::AbstractArray{T,N}) where {T,N}
-    X̃ = PermutedDimsArray(X, _permfirst(d, N))
-    m = size(X̃,1)
-    X̃ .= X̃ ./ (sinpi.(one(T)/(2m) .+ ((1:m) .- one(T))/m) ./ m)
-    X
+function __chebu1_prescale!(d::Number, X::AbstractArray{T}, Rpre, Rpost) where {T}
+    m = size(X,d)
+    r = one(T)/(2m) .+ ((1:m) .- one(T))./m
+    applydim!(v -> v .*= sinpi.(r) ./ m, X, Rpre, Rpost, :)
 end
 
-@inline function _chebu1_postscale!(d, y::AbstractArray)
-    for k in d
-        _chebu1_postscale!(k, y)
-    end
-    y
+@inline function __chebu1_postscale!(d::Number, X::AbstractArray{T}, Rpre, Rpost) where {T}
+    m = size(X,d)
+    r = one(T)/(2m) .+ ((1:m) .- one(T))./m
+    applydim!(v -> v ./= sinpi.(r) ./ m, X, Rpre, Rpost, :)
 end
 
 function *(P::ChebyshevUTransformPlan{T,1,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
@@ -413,35 +425,18 @@ function mul!(y::AbstractArray{T}, P::ChebyshevUTransformPlan{T,1,K,false}, x::A
 end
 
 
-@inline function _chebu2_prescale!(d::Number, X::AbstractArray{T,N}) where {T,N}
-    X̃ = PermutedDimsArray(X, _permfirst(d, N))
-    m = size(X̃,1)
+@inline function __chebu2_prescale!(d, X::AbstractArray{T}, Rpre, Rpost) where {T}
+    m = size(X,d)
     c = one(T)/ (m+1)
-    X̃ .= sinpi.((1:m) .* c) .* X̃
-    X
-end
-
-@inline function _chebu2_prescale!(d, y::AbstractArray)
-    for k in d
-        _chebu2_prescale!(k, y)
-    end
-    y
+    r = (1:m) .* c
+    applydim!(v -> v .*= sinpi.(r), X, Rpre, Rpost, :)
 end
 
-
-@inline function _chebu2_postscale!(d::Number, X::AbstractArray{T,N}) where {T,N}
-    X̃ = PermutedDimsArray(X, _permfirst(d, N))
-    m = size(X̃,1)
+@inline function __chebu2_postscale!(d::Number, X::AbstractArray{T}, Rpre, Rpost) where {T}
+    m = size(X,d)
     c = one(T)/ (m+1)
-    X̃ .= X̃ ./ sinpi.((1:m) .* c)
-    X
-end
-
-@inline function _chebu2_postscale!(d, y::AbstractArray)
-    for k in d
-        _chebu2_postscale!(k, y)
-    end
-    y
+    r = (1:m) .* c
+    applydim!(v -> v ./= sinpi.(r), X, Rpre, Rpost, :)
 end
 
 function *(P::ChebyshevUTransformPlan{T,2,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
@@ -525,19 +520,10 @@ inv(P::IChebyshevUTransformPlan{T,2}) where {T} = ChebyshevUTransformPlan{T,2}(P
 inv(P::ChebyshevUTransformPlan{T,1}) where {T} = IChebyshevUTransformPlan{T,1}(inv(P.plan).p)
 inv(P::IChebyshevUTransformPlan{T,1}) where {T} = ChebyshevUTransformPlan{T,1}(inv(P.plan).p)
 
-@inline function _ichebu1_postscale!(d::Number, X::AbstractArray{T,N}) where {T,N}
-    X̃ = PermutedDimsArray(X, _permfirst(d, N))
-    m = size(X̃,1)
-    X̃ .= X̃ ./ (2 .* sinpi.(one(T)/(2m) .+ ((1:m) .- one(T))/m))
-    X
-end
-
-
-@inline function _ichebu1_postscale!(d, y::AbstractArray)
-    for k in d
-        _ichebu1_postscale!(k, y)
-    end
-    y
+@inline function __ichebu1_postscale!(d::Number, X::AbstractArray{T}, Rpre, Rpost) where {T}
+    m = size(X,d)
+    r = one(T)/(2m) .+ ((1:m) .- one(T))/m
+    applydim!(v -> v ./= 2 .* sinpi.(r), X, Rpre, Rpost, :)
 end
 
 function *(P::IChebyshevUTransformPlan{T,1,K,true}, x::AbstractArray{T}) where {T<:fftwNumber,K}
@@ -742,4 +728,4 @@ for pln in (:plan_chebyshevtransform!, :plan_chebyshevtransform,
         $pln(x::AbstractArray, dims...; kws...) = $pln(x, Val(1), dims...; kws...)
         $pln(::Type{T}, szs, dims...; kwds...) where T = $pln(Array{T}(undef, szs...), dims...; kwds...)
     end
-end
\ No newline at end of file
+end

From 41cdc3cdc3c674b2454d5ca9b77a7d083068c4df Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Tue, 30 Jan 2024 23:48:36 +0530
Subject: [PATCH 163/222] Return tuple in _permfirst

---
 src/chebyshevtransform.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 9d2f90c8..8dd8ba1e 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -373,13 +373,14 @@ function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws.
 end
 
 
-_permfirst(d, N) = [d; 1:d-1; d+1:N]
+@inline _permfirst(d, N) = ntuple(i -> i == 1 ? d : i <= d ? i-1 : i, N)
 
 for f in [:_chebu1_prescale!, :_chebu1_postscale!, :_chebu2_prescale!, :_chebu2_postscale!,
             :_ichebu1_postscale!]
     _f = Symbol(:_, f)
     @eval begin
         @inline function $f(d::Number, X::AbstractArray)
+            d ∈ 1:ndims(X) || throw("dimension $d must lie between 1 and $(ndims(X))")
             Rpre = CartesianIndices(axes(X)[1:d-1])
             Rpost = CartesianIndices(axes(X)[d+1:end])
             $_f(d, X, Rpre, Rpost)

From 7dd11abdac0a8029c7fff37e80680edb673e01fb Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Tue, 30 Jan 2024 23:54:56 +0530
Subject: [PATCH 164/222] check dim in lmul_dim_begin and friends

---
 src/chebyshevtransform.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 8dd8ba1e..55c65e62 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -69,6 +69,7 @@ for op in (:ldiv, :lmul)
     @eval begin
         function $op_dim_begin!(α, d::Number, y::AbstractArray)
             # scale just the d-th dimension by permuting it to the first
+            d ∈ 1:ndims(y) || throw(ArgumentError("dimension $d must lie between 1 and $(ndims(y))"))
             Rpre = CartesianIndices(axes(y)[1:d-1])
             Rpost = CartesianIndices(axes(y)[d+1:end])
             applydim!(v -> $op!(α, v), y, Rpre, Rpost, 1)
@@ -76,6 +77,7 @@ for op in (:ldiv, :lmul)
 
         function $op_dim_end!(α, d::Number, y::AbstractArray)
             # scale just the d-th dimension by permuting it to the first
+            d ∈ 1:ndims(y) || throw(ArgumentError("dimension $d must lie between 1 and $(ndims(y))"))
             Rpre = CartesianIndices(axes(y)[1:d-1])
             Rpost = CartesianIndices(axes(y)[d+1:end])
             applydim!(v -> $op!(α, v), y, Rpre, Rpost, size(y, d))

From 809aee71ac2cd50f87b862a37c57ea81cdf6a050 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Wed, 31 Jan 2024 00:05:03 +0530
Subject: [PATCH 165/222] Update ChebyshevToLegendrePlanTH multiplication

---
 src/toeplitzhankel.jl | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/toeplitzhankel.jl b/src/toeplitzhankel.jl
index 78cc43fa..1c552a09 100644
--- a/src/toeplitzhankel.jl
+++ b/src/toeplitzhankel.jl
@@ -137,14 +137,14 @@ function *(P::ChebyshevToLegendrePlanTH, v::AbstractVector{S}) where S
     v
 end
 
-function _cheb2leg_rescale1!(V::AbstractArray{S}) where S
-    m = size(V,1)
-    for j = CartesianIndices(tail(axes(V)))
+function _cheb2leg_rescale1!(V::AbstractArray{S}, Rpre, Rpost, d) where S
+    m = size(V,d)
+    for Ipost in Rpost, Ipre in Rpre
         ret = zero(S)
         @inbounds for k = 1:2:m
-            ret += -V[k,j]/(k*(k-2))
+            ret += -V[Ipre,k,Ipost]/(k*(k-2))
         end
-        V[1,j] = ret
+        V[Ipre,1,Ipost] = ret
     end
     V
 end
@@ -152,11 +152,13 @@ end
 _dropfirstdim(d::Int) = ()
 _dropfirstdim(d::Int, m, szs...) = ((d == 1 ? 2 : 1):m, _dropfirstdim(d-1, szs...)...)
 
-function *(P::ChebyshevToLegendrePlanTH, V::AbstractArray{<:Any,N}) where N
+function *(P::ChebyshevToLegendrePlanTH, V::AbstractArray)
     m,n = size(V)
     tmp = P.toeplitzhankel.tmp
     for (d,R,L,T) in zip(P.toeplitzhankel.dims,P.toeplitzhankel.R,P.toeplitzhankel.L,P.toeplitzhankel.T)
-        _cheb2leg_rescale1!(PermutedDimsArray(V, _permfirst(d, N)))
+        Rpre = CartesianIndices(axes(V)[1:d-1])
+        Rpost = CartesianIndices(axes(V)[d+1:end])
+        _cheb2leg_rescale1!(V, Rpre, Rpost, d)
         _th_applymul!(d, view(V, _dropfirstdim(d, size(V)...)...), T, L, R, tmp)
     end
     V
@@ -729,4 +731,4 @@ th_cheb2jac(v, α, β, dims...) = plan_th_cheb2jac!(eltype(v), size(v), α, β,
 
 plan_th_jac2cheb!(::Type{T}, mn, α, β, dims...) where T = Jac2ChebPlanTH(plan_th_jac2jac!(T, mn, α, β, -one(α)/2, -one(α)/2, dims...))
 plan_th_jac2cheb!(arr::AbstractArray{T}, α, β, dims...) where T = plan_th_jac2cheb!(T, size(arr), α, β, dims...)
-th_jac2cheb(v, α, β, dims...) = plan_th_jac2cheb!(eltype(v), size(v), α, β, dims...)*copy(v)
\ No newline at end of file
+th_jac2cheb(v, α, β, dims...) = plan_th_jac2cheb!(eltype(v), size(v), α, β, dims...)*copy(v)

From a929128ba2a4d061f4a74b2f582877ae6d25cc13 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Wed, 31 Jan 2024 10:54:59 +0530
Subject: [PATCH 166/222] applydim in ToeplitzPlan

---
 src/chebyshevtransform.jl | 37 ++++++++++++++++++-------------------
 src/toeplitzplans.jl      |  5 ++---
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 55c65e62..a8a0216f 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -61,6 +61,11 @@ function applydim!(op!, X::AbstractArray, Rpre, Rpost, ind)
     end
     X
 end
+function applydim!(op!, X::AbstractArray, d::Integer, ind)
+    Rpre = CartesianIndices(axes(X)[1:d-1])
+    Rpost = CartesianIndices(axes(X)[d+1:end])
+    applydim!(op!, X, Rpre, Rpost, ind)
+end
 
 for op in (:ldiv, :lmul)
     op_dim_begin! = Symbol(op, :_dim_begin!)
@@ -70,17 +75,13 @@ for op in (:ldiv, :lmul)
         function $op_dim_begin!(α, d::Number, y::AbstractArray)
             # scale just the d-th dimension by permuting it to the first
             d ∈ 1:ndims(y) || throw(ArgumentError("dimension $d must lie between 1 and $(ndims(y))"))
-            Rpre = CartesianIndices(axes(y)[1:d-1])
-            Rpost = CartesianIndices(axes(y)[d+1:end])
-            applydim!(v -> $op!(α, v), y, Rpre, Rpost, 1)
+            applydim!(v -> $op!(α, v), y, d, 1)
         end
 
         function $op_dim_end!(α, d::Number, y::AbstractArray)
             # scale just the d-th dimension by permuting it to the first
             d ∈ 1:ndims(y) || throw(ArgumentError("dimension $d must lie between 1 and $(ndims(y))"))
-            Rpre = CartesianIndices(axes(y)[1:d-1])
-            Rpost = CartesianIndices(axes(y)[d+1:end])
-            applydim!(v -> $op!(α, v), y, Rpre, Rpost, size(y, d))
+            applydim!(v -> $op!(α, v), y, d, size(y, d))
         end
     end
 end
@@ -383,9 +384,7 @@ for f in [:_chebu1_prescale!, :_chebu1_postscale!, :_chebu2_prescale!, :_chebu2_
     @eval begin
         @inline function $f(d::Number, X::AbstractArray)
             d ∈ 1:ndims(X) || throw("dimension $d must lie between 1 and $(ndims(X))")
-            Rpre = CartesianIndices(axes(X)[1:d-1])
-            Rpost = CartesianIndices(axes(X)[d+1:end])
-            $_f(d, X, Rpre, Rpost)
+            $_f(d, X)
             X
         end
         @inline function $f(d, y::AbstractArray)
@@ -397,16 +396,16 @@ for f in [:_chebu1_prescale!, :_chebu1_postscale!, :_chebu2_prescale!, :_chebu2_
     end
 end
 
-function __chebu1_prescale!(d::Number, X::AbstractArray{T}, Rpre, Rpost) where {T}
+function __chebu1_prescale!(d::Number, X::AbstractArray{T}) where {T}
     m = size(X,d)
     r = one(T)/(2m) .+ ((1:m) .- one(T))./m
-    applydim!(v -> v .*= sinpi.(r) ./ m, X, Rpre, Rpost, :)
+    applydim!(v -> v .*= sinpi.(r) ./ m, X, d, :)
 end
 
-@inline function __chebu1_postscale!(d::Number, X::AbstractArray{T}, Rpre, Rpost) where {T}
+@inline function __chebu1_postscale!(d::Number, X::AbstractArray{T}) where {T}
     m = size(X,d)
     r = one(T)/(2m) .+ ((1:m) .- one(T))./m
-    applydim!(v -> v ./= sinpi.(r) ./ m, X, Rpre, Rpost, :)
+    applydim!(v -> v ./= sinpi.(r) ./ m, X, d, :)
 end
 
 function *(P::ChebyshevUTransformPlan{T,1,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
@@ -428,18 +427,18 @@ function mul!(y::AbstractArray{T}, P::ChebyshevUTransformPlan{T,1,K,false}, x::A
 end
 
 
-@inline function __chebu2_prescale!(d, X::AbstractArray{T}, Rpre, Rpost) where {T}
+@inline function __chebu2_prescale!(d, X::AbstractArray{T}) where {T}
     m = size(X,d)
     c = one(T)/ (m+1)
     r = (1:m) .* c
-    applydim!(v -> v .*= sinpi.(r), X, Rpre, Rpost, :)
+    applydim!(v -> v .*= sinpi.(r), X, d, :)
 end
 
-@inline function __chebu2_postscale!(d::Number, X::AbstractArray{T}, Rpre, Rpost) where {T}
+@inline function __chebu2_postscale!(d::Number, X::AbstractArray{T}) where {T}
     m = size(X,d)
     c = one(T)/ (m+1)
     r = (1:m) .* c
-    applydim!(v -> v ./= sinpi.(r), X, Rpre, Rpost, :)
+    applydim!(v -> v ./= sinpi.(r), X, d, :)
 end
 
 function *(P::ChebyshevUTransformPlan{T,2,K,true,N}, x::AbstractArray{T,N}) where {T,K,N}
@@ -523,10 +522,10 @@ inv(P::IChebyshevUTransformPlan{T,2}) where {T} = ChebyshevUTransformPlan{T,2}(P
 inv(P::ChebyshevUTransformPlan{T,1}) where {T} = IChebyshevUTransformPlan{T,1}(inv(P.plan).p)
 inv(P::IChebyshevUTransformPlan{T,1}) where {T} = ChebyshevUTransformPlan{T,1}(inv(P.plan).p)
 
-@inline function __ichebu1_postscale!(d::Number, X::AbstractArray{T}, Rpre, Rpost) where {T}
+@inline function __ichebu1_postscale!(d::Number, X::AbstractArray{T}) where {T}
     m = size(X,d)
     r = one(T)/(2m) .+ ((1:m) .- one(T))/m
-    applydim!(v -> v ./= 2 .* sinpi.(r), X, Rpre, Rpost, :)
+    applydim!(v -> v ./= 2 .* sinpi.(r), X, d, :)
 end
 
 function *(P::IChebyshevUTransformPlan{T,1,K,true}, x::AbstractArray{T}) where {T<:fftwNumber,K}
diff --git a/src/toeplitzplans.jl b/src/toeplitzplans.jl
index 42d24062..9be77234 100644
--- a/src/toeplitzplans.jl
+++ b/src/toeplitzplans.jl
@@ -61,12 +61,11 @@ function *(A::ToeplitzPlan{T,N}, X::AbstractArray{T,N}) where {T,N}
 
     # Fourier transform each dimension
     dft * Y
-    
+
     # Multiply by a diagonal matrix along each dimension by permuting
     # to first dimension
     for (vc,d) in zip(vcs,dims)
-        Ỹ = PermutedDimsArray(Y, _permfirst(d, N))
-        Ỹ .= vc .* Ỹ
+        applydim!(v -> v .= vc .* v, Y, d, :)
     end
 
     # Transform back

From da5f2149438dcce301b147f0e596d1c0f07bd553 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Wed, 31 Jan 2024 11:05:54 +0530
Subject: [PATCH 167/222] Remove _permfirst

---
 src/chebyshevtransform.jl | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index a8a0216f..d30177f3 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -375,9 +375,6 @@ function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{2}, dims...; kws.
     ChebyshevUTransformPlan{T,2}(FFTW.plan_r2r(x, USECONDKIND, dims...; kws...))
 end
 
-
-@inline _permfirst(d, N) = ntuple(i -> i == 1 ? d : i <= d ? i-1 : i, N)
-
 for f in [:_chebu1_prescale!, :_chebu1_postscale!, :_chebu2_prescale!, :_chebu2_postscale!,
             :_ichebu1_postscale!]
     _f = Symbol(:_, f)

From 7ca43ceb57c1c061f9c2fea635a04cd7ced7254b Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 15 Mar 2024 10:43:23 -0500
Subject: [PATCH 168/222] add Symmetric(Banded)ToeplitzPlusHankel

---
 Project.toml                             |   4 +-
 src/FastTransforms.jl                    |   8 +-
 src/SymmetricToeplitzPlusHankel.jl       | 135 +++++++++++++++++++++++
 test/runtests.jl                         |   3 +-
 test/symmetrictoeplitzplushankeltests.jl |  39 +++++++
 5 files changed, 185 insertions(+), 4 deletions(-)
 create mode 100644 src/SymmetricToeplitzPlusHankel.jl
 create mode 100644 test/symmetrictoeplitzplushankeltests.jl

diff --git a/Project.toml b/Project.toml
index 2f5b3613..8bba0be1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,9 +1,10 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.15.16"
+version = "0.16.0"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
+BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
 FastTransforms_jll = "34b6f7d7-08f9-5794-9e10-3819e4c7e49a"
@@ -17,6 +18,7 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "1.0"
+BandedMatrices = "1.5"
 FFTW = "1.7"
 FastGaussQuadrature = "0.4, 0.5, 1"
 FastTransforms_jll = "0.6.2"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index e9bf6fac..82c34990 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,6 +1,6 @@
 module FastTransforms
 
-using FastGaussQuadrature, FillArrays, LinearAlgebra,
+using BandedMatrices, FastGaussQuadrature, FillArrays, LinearAlgebra,
       Reexport, SpecialFunctions, ToeplitzMatrices
 
 @reexport using AbstractFFTs
@@ -19,6 +19,8 @@ import AbstractFFTs: Plan, ScaledPlan,
                      fftshift, ifftshift, rfft_output_size, brfft_output_size,
                      normalization
 
+import BandedMatrices: bandwidths
+
 import FFTW: dct, dct!, idct, idct!, plan_dct!, plan_idct!,
              plan_dct, plan_idct, fftwNumber
 
@@ -26,7 +28,7 @@ import FastGaussQuadrature: unweightedgausshermite
 
 import FillArrays: AbstractFill, getindex_value
 
-import LinearAlgebra: mul!, lmul!, ldiv!
+import LinearAlgebra: mul!, lmul!, ldiv!, cholesky
 
 import GenericFFT: interlace # imported in downstream packages
 
@@ -112,6 +114,8 @@ include("specialfunctions.jl")
 include("toeplitzplans.jl")
 include("toeplitzhankel.jl")
 
+include("SymmetricToeplitzPlusHankel.jl")
+
 # following use libfasttransforms by default
 for f in (:jac2jac,
     :lag2lag, :jac2ultra, :ultra2jac, :jac2cheb,
diff --git a/src/SymmetricToeplitzPlusHankel.jl b/src/SymmetricToeplitzPlusHankel.jl
new file mode 100644
index 00000000..a9bc4013
--- /dev/null
+++ b/src/SymmetricToeplitzPlusHankel.jl
@@ -0,0 +1,135 @@
+struct SymmetricToeplitzPlusHankel{T} <: AbstractMatrix{T}
+    v::Vector{T}
+    n::Int
+end
+
+function SymmetricToeplitzPlusHankel(v::Vector{T}) where T
+    n = (length(v)+1)÷2
+    SymmetricToeplitzPlusHankel{T}(v, n)
+end
+
+size(A::SymmetricToeplitzPlusHankel{T}) where T = (A.n, A.n)
+getindex(A::SymmetricToeplitzPlusHankel{T}, i::Integer, j::Integer) where T = A.v[abs(i-j)+1] + A.v[i+j-1]
+
+struct SymmetricBandedToeplitzPlusHankel{T} <: BandedMatrices.AbstractBandedMatrix{T}
+    v::Vector{T}
+    n::Int
+    b::Int
+end
+
+function SymmetricBandedToeplitzPlusHankel(v::Vector{T}, n::Integer) where T
+    SymmetricBandedToeplitzPlusHankel{T}(v, n, length(v)-1)
+end
+
+size(A::SymmetricBandedToeplitzPlusHankel{T}) where T = (A.n, A.n)
+function getindex(A::SymmetricBandedToeplitzPlusHankel{T}, i::Integer, j::Integer) where T
+    v = A.v
+    if abs(i-j) < length(v)
+        if i+j-1 ≤ length(v)
+            v[abs(i-j)+1] + v[i+j-1]
+        else
+            v[abs(i-j)+1]
+        end
+    else
+        zero(T)
+    end
+end
+bandwidths(A::SymmetricBandedToeplitzPlusHankel{T}) where T = (A.b, A.b)
+
+#
+# Jac*W-W*Jac' = G*J*G'
+# This returns G and J, where J = [0 I; -I 0], respecting the skew-symmetry of the right-hand side.
+#
+function compute_skew_generators(A::SymmetricToeplitzPlusHankel{T}) where T
+    v = A.v
+    n = size(A, 1)
+    J = [zero(T) one(T); -one(T) zero(T)]
+    G = zeros(T, n, 2)
+    G[n, 1] = one(T)
+    u2 = reverse(v[2:n+1])
+    u2[1:n-1] .+= v[n+1:2n-1]
+    G[:, 2] .= -u2
+    G, J
+end
+
+function cholesky(A::SymmetricToeplitzPlusHankel{T}) where T
+    n = size(A, 1)
+    G, J = compute_skew_generators(A)
+    L = zeros(T, n, n)
+    r = A[:, 1]
+    r2 = zeros(T, n)
+    l = zeros(T, n)
+    v = zeros(T, n)
+    col1 = zeros(T, n)
+    STPHcholesky!(L, G, r, r2, l, v, col1, n)
+    return Cholesky(L, 'L', 0)
+end
+
+function STPHcholesky!(L::Matrix{T}, G, r, r2, l, v, col1, n) where T
+    @inbounds @simd for k in 1:n-1
+        x = sqrt(r[1])
+        for j in 1:n-k+1
+            L[j+k-1, k] = l[j] = r[j]/x
+        end
+        for j in 1:n-k+1
+            v[j] = G[j, 1]*G[1,2]-G[j,2]*G[1,1]
+        end
+        F12 = k == 1 ? T(2) : T(1)
+        r2[1] = (r[2] - v[1])/F12
+        for j in 2:n-k
+            r2[j] = (r[j+1]+r[j-1] + r[1]*col1[j] - col1[1]*r[j] - v[j])/F12
+        end
+        r2[n-k+1] = (r[n-k] + r[1]*col1[n-k+1] - col1[1]*r[n-k+1] - v[n-k+1])/F12
+        cst = r[2]/x
+        for j in 1:n-k
+            r[j] = r2[j+1] - cst*l[j+1]
+        end
+        for j in 1:n-k
+            col1[j] = -F12/x*l[j+1]
+        end
+        c1 = G[1, 1]
+        c2 = G[1, 2]
+        for j in 1:n-k
+            G[j, 1] = G[j+1, 1] - l[j+1]*c1/x
+            G[j, 2] = G[j+1, 2] - l[j+1]*c2/x
+        end
+    end
+    L[n, n] = sqrt(r[1])
+end
+
+function cholesky(A::SymmetricBandedToeplitzPlusHankel{T}) where T
+    n = size(A, 1)
+    b = A.b
+    R = BandedMatrix{T}(undef, (n, n), (0, bandwidth(A, 2)))
+    r = A[1:b+2, 1]
+    r2 = zeros(T, b+3)
+    l = zeros(T, b+3)
+    col1 = zeros(T, b+2)
+    SBTPHcholesky!(R, r, r2, l, col1, n, b)
+    return Cholesky(R, 'U', 0)
+end
+
+function SBTPHcholesky!(R::BandedMatrix{T}, r, r2, l, col1, n, b) where T
+    @inbounds @simd for k in 1:n
+        x = sqrt(r[1])
+        for j in 1:b+1
+            l[j] = r[j]/x
+        end
+        for j in 1:min(n-k+1, b+1)
+            R[k, j+k-1] = l[j]
+        end
+        F12 = k == 1 ? T(2) : T(1)
+        r2[1] = r[2]/F12
+        for j in 2:b+1
+            r2[j] = (r[j+1]+r[j-1] + r[1]*col1[j] - col1[1]*r[j])/F12
+        end
+        r2[b+2] = (r[b+1] + r[1]*col1[b+2] - col1[1]*r[b+2])/F12
+        cst = r[2]/x
+        for j in 1:b+2
+            r[j] = r2[j+1] - cst*l[j+1]
+        end
+        for j in 1:b+2
+            col1[j] = -F12/x*l[j+1]
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 1efb21e9..de16f36b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,4 +10,5 @@ include("gaunttests.jl")
 include("hermitetests.jl")
 include("clenshawtests.jl")
 include("toeplitzplanstests.jl")
-include("toeplitzhankeltests.jl")
\ No newline at end of file
+include("toeplitzhankeltests.jl")
+include("symmetrictoeplitzplushankeltests.jl")
diff --git a/test/symmetrictoeplitzplushankeltests.jl b/test/symmetrictoeplitzplushankeltests.jl
new file mode 100644
index 00000000..c26c38d3
--- /dev/null
+++ b/test/symmetrictoeplitzplushankeltests.jl
@@ -0,0 +1,39 @@
+using BandedMatrices, FastTransforms, LinearAlgebra, ToeplitzMatrices, Test
+
+import FastTransforms: SymmetricToeplitzPlusHankel, SymmetricBandedToeplitzPlusHankel
+
+@testset "SymmetricToeplitzPlusHankel" begin
+    n = 128
+    for T in (Float32, Float64, BigFloat)
+        μ = -FastTransforms.chebyshevlogmoments1(T, 2n-1)
+        μ[1] += 1
+        W = SymmetricToeplitzPlusHankel(μ/2)
+        SMW = Symmetric(Matrix(W))
+        @test W ≈ SymmetricToeplitz(μ[1:(length(μ)+1)÷2]/2) + Hankel(μ/2)
+        L = cholesky(W).L
+        R = cholesky(SMW).U
+        @test L*L' ≈ W
+        @test L' ≈ R
+    end
+end
+
+@testset "SymmetricBandedToeplitzPlusHankel" begin
+    n = 1024
+    for T in (Float32, Float64)
+        μ = T[1.875; 0.00390625; 0.5; 0.0009765625; 0.0625]
+        W = SymmetricBandedToeplitzPlusHankel(μ/2, n)
+        SBW = Symmetric(BandedMatrix(W))
+        W1 = SymmetricToeplitzPlusHankel([μ/2; zeros(2n-1-length(μ))])
+        SMW = Symmetric(Matrix(W))
+        U = cholesky(SMW).U
+        L = cholesky(W1).L
+        UB = cholesky(SBW).U
+        R = cholesky(W).U
+        @test L*L' ≈ W
+        @test UB'UB ≈ W
+        @test R'R ≈ W
+        @test UB ≈ U
+        @test L' ≈ U
+        @test R ≈ U
+    end
+end

From 324c2230c673ad270291fb2731e714ce9317757a Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 15 Mar 2024 15:05:15 -0500
Subject: [PATCH 169/222] add absolute value moments

---
 Project.toml            |  2 +-
 src/specialfunctions.jl | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 8bba0be1..97d36acb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.16.0"
+version = "0.16.1"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/specialfunctions.jl b/src/specialfunctions.jl
index 66ef53c2..b5915ab7 100644
--- a/src/specialfunctions.jl
+++ b/src/specialfunctions.jl
@@ -307,6 +307,23 @@ function chebyshevlogmoments1(::Type{T}, N::Int) where T
     μ
 end
 
+"""
+Modified Chebyshev moments of the first kind with respect to the absolute value weight:
+
+```math
+    \\int_{-1}^{+1} T_n(x) |x|{\\rm\\,d}x.
+```
+"""
+function chebyshevabsmoments1(::Type{T}, N::Int) where T
+    μ = zeros(T, N)
+    if N > 0
+        for i=0:4:N-1
+            @inbounds μ[i+1] = -T(1)/T((i÷2)^2-1)
+        end
+    end
+    μ
+end
+
 """
 Modified Chebyshev moments of the second kind:
 
@@ -359,6 +376,23 @@ function chebyshevlogmoments2(::Type{T}, N::Int) where T
     μ
 end
 
+"""
+Modified Chebyshev moments of the second kind with respect to the absolute value weight:
+
+```math
+    \\int_{-1}^{+1} U_n(x) |x|{\\rm\\,d}x.
+```
+"""
+function chebyshevabsmoments2(::Type{T}, N::Int) where T
+    μ = chebyshevabsmoments1(T, N)
+    if N > 1
+        μ[2] *= two(T)
+        for i=1:N-2
+            @inbounds μ[i+2] = 2μ[i+2] + μ[i]
+        end
+    end
+    μ
+end
 
 function sphrand(::Type{T}, m::Int, n::Int) where T
     A = zeros(T, m, n)

From 2c763590b22f8c38e3a60c0e296776c66f49e411 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 21 Mar 2024 11:04:36 -0500
Subject: [PATCH 170/222] create halfrange example

---
 docs/Project.toml     |  1 +
 docs/make.jl          |  2 ++
 examples/halfrange.jl | 62 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+)
 create mode 100644 examples/halfrange.jl

diff --git a/docs/Project.toml b/docs/Project.toml
index cabe3c60..c96b3cd3 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FastTransforms = "057dd010-8810-581a-b7be-e3fc3b93f78c"
+LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 PlotlyJS = "f0f68f2c-4968-5e81-91da-67840de0976a"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
diff --git a/docs/make.jl b/docs/make.jl
index 77842971..dd530e8f 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -10,6 +10,7 @@ examples = [
     "automaticdifferentiation.jl",
     "chebyshev.jl",
     "disk.jl",
+    "halfrange.jl",
     "nonlocaldiffusion.jl",
     "padua.jl",
     "sphere.jl",
@@ -43,6 +44,7 @@ makedocs(
                         "generated/automaticdifferentiation.md",
                         "generated/chebyshev.md",
                         "generated/disk.md",
+                        "generated/halfrange.md",
                         "generated/nonlocaldiffusion.md",
                         "generated/padua.md",
                         "generated/sphere.md",
diff --git a/examples/halfrange.jl b/examples/halfrange.jl
new file mode 100644
index 00000000..6d9f6a69
--- /dev/null
+++ b/examples/halfrange.jl
@@ -0,0 +1,62 @@
+# # Half-range Chebyshev polynomials
+# In [this paper](https://doi.org/10.1137/090752456), [Daan Huybrechs](https://github.com/daanhb) introduced the so-called half-range Chebyshev polynomials
+# as the non-classical orthogonal polynomials with respect to the inner product:
+# ```math
+# \langle f, g \rangle = \int_0^1 f(x) g(x)\frac{{\rm d} x}{\sqrt{1-x^2}}.
+# ```
+# By the variable transformation $y = 2x-1$, the resulting polynomials can be related to
+# orthogonal polynomials on $(-1,1)$ with the Jacobi weight $(1-y)^{-\frac{1}{2}}$ modified by the weight $(3+y)^{-\frac{1}{2}}$.
+#
+# We shall use the fact that:
+# ```math
+# \frac{1}{\sqrt{3+y}} = \sqrt{\frac{2}{3+\sqrt{8}}}\sum_{n=0}^\infty P_n(y) \left(\frac{-1}{3+\sqrt{8}}\right)^n,
+# ```
+# and results from [this paper](https://arxiv.org/abs/2302.08448) to consider the half-range Chebyshev polynomials as
+# modifications of the Jacobi polynomials $P_n^{(-\frac{1}{2},0)}(y)$.
+
+using FastTransforms, LinearAlgebra, Plots, LaTeXStrings
+const GENFIGS = joinpath(pkgdir(FastTransforms), "docs/src/generated")
+!isdir(GENFIGS) && mkdir(GENFIGS)
+plotlyjs()
+
+# We truncate the generating function to ensure an absolute error of `eps()`:
+z = -1/(3+sqrt(8))
+K = sqrt(-2z)
+N = log(abs(z), eps()*(1-abs(z))/K) - 1
+d = K .* z .^(0:N)
+
+# Then, we convert this representation to the expansion in Jacobi polynomials $P_n^{(-\frac{1}{2}, 0)}(y)$:
+u = jac2jac(d, 0.0, 0.0, -0.5, 0.0; norm1 = false, norm2 = true)
+
+# Our working polynomial degree will be:
+n = 100
+
+# We compute the connection coefficients between the modified orthogonal polynomials and the Jacobi polynomials:
+P = plan_modifiedjac2jac(Float64, n+1, -0.5, 0.0, u)
+
+# We store the connection to first kind Chebyshev polynomials:
+P1 = plan_jac2cheb(Float64, n+1, -0.5, 0.0; normjac = true)
+
+# We compute the Chebyshev series for the degree-$k\le n$ modified polynomial and its values at the Chebyshev points:
+q = k -> lmul!(P1, lmul!(P, [zeros(k); 1.0; zeros(n-k)]))
+qvals = k-> ichebyshevtransform(q(k))
+
+# With the symmetric Jacobi matrix for $P_n^{(-\frac{1}{2}, 0)}(y)$ and the modified plan, we may compute the modified Jacobi matrix and the corresponding roots (as eigenvalues):
+XP = SymTridiagonal([-inv((4n-1)*(4n-5)) for n in 1:n+1], [4n*(2n-1)/(4n-1)/sqrt((4n-3)*(4n+1)) for n in 1:n])
+XQ = FastTransforms.modified_jacobi_matrix(P, XP)
+
+# And we plot:
+x = (chebyshevpoints(Float64, n+1, Val(1)) .+ 1 ) ./ 2
+p = plot(x, qvals(0); linewidth=2.0, legend = false, xlim=(0,1), xlabel=L"x",
+         ylabel=L"T^h_n(x)", title="Half-Range Chebyshev Polynomials and Their Roots",
+         extra_plot_kwargs = KW(:include_mathjax => "cdn"))
+for k in 1:10
+    λ = (eigvals(SymTridiagonal(XQ.dv[1:k], XQ.ev[1:k-1])) .+ 1) ./ 2
+    plot!(x, qvals(k); linewidth=2.0, color=palette(:default)[k+1])
+    scatter!(λ, zero(λ); markersize=2.5, color=palette(:default)[k+1])
+end
+p
+#savefig(joinpath(GENFIGS, "halfrange.html"))
+###```@raw html
+###<object type="text/html" data="../halfrange.html" style="width:100%;height:400px;"></object>
+###```

From 1bfa33b8856c8a77822dc74fe448deaeaa39d68e Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 21 Mar 2024 11:05:55 -0500
Subject: [PATCH 171/222] uncomment the savefig

---
 examples/halfrange.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/halfrange.jl b/examples/halfrange.jl
index 6d9f6a69..eec945ab 100644
--- a/examples/halfrange.jl
+++ b/examples/halfrange.jl
@@ -56,7 +56,7 @@ for k in 1:10
     scatter!(λ, zero(λ); markersize=2.5, color=palette(:default)[k+1])
 end
 p
-#savefig(joinpath(GENFIGS, "halfrange.html"))
+savefig(joinpath(GENFIGS, "halfrange.html"))
 ###```@raw html
 ###<object type="text/html" data="../halfrange.html" style="width:100%;height:400px;"></object>
 ###```

From 04d8e8718ee41024fd47dd2aeddd028bc49f586b Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 21 Mar 2024 11:19:28 -0500
Subject: [PATCH 172/222] curtail large output of SymTridiagonal

---
 examples/halfrange.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/halfrange.jl b/examples/halfrange.jl
index eec945ab..7205b237 100644
--- a/examples/halfrange.jl
+++ b/examples/halfrange.jl
@@ -43,7 +43,7 @@ qvals = k-> ichebyshevtransform(q(k))
 
 # With the symmetric Jacobi matrix for $P_n^{(-\frac{1}{2}, 0)}(y)$ and the modified plan, we may compute the modified Jacobi matrix and the corresponding roots (as eigenvalues):
 XP = SymTridiagonal([-inv((4n-1)*(4n-5)) for n in 1:n+1], [4n*(2n-1)/(4n-1)/sqrt((4n-3)*(4n+1)) for n in 1:n])
-XQ = FastTransforms.modified_jacobi_matrix(P, XP)
+XQ = FastTransforms.modified_jacobi_matrix(P, XP);
 
 # And we plot:
 x = (chebyshevpoints(Float64, n+1, Val(1)) .+ 1 ) ./ 2

From 78c98fb664f60d9d9053a98db293cfd99567677b Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 21 Mar 2024 12:39:33 -0500
Subject: [PATCH 173/222] include banded diff

---
 examples/halfrange.jl | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/examples/halfrange.jl b/examples/halfrange.jl
index 7205b237..5fce0ea3 100644
--- a/examples/halfrange.jl
+++ b/examples/halfrange.jl
@@ -1,6 +1,6 @@
 # # Half-range Chebyshev polynomials
 # In [this paper](https://doi.org/10.1137/090752456), [Daan Huybrechs](https://github.com/daanhb) introduced the so-called half-range Chebyshev polynomials
-# as the non-classical orthogonal polynomials with respect to the inner product:
+# as the semi-classical orthogonal polynomials with respect to the inner product:
 # ```math
 # \langle f, g \rangle = \int_0^1 f(x) g(x)\frac{{\rm d} x}{\sqrt{1-x^2}}.
 # ```
@@ -19,10 +19,10 @@ const GENFIGS = joinpath(pkgdir(FastTransforms), "docs/src/generated")
 !isdir(GENFIGS) && mkdir(GENFIGS)
 plotlyjs()
 
-# We truncate the generating function to ensure an absolute error of `eps()`:
+# We truncate the generating function to ensure a relative error less than `eps()` in the uniform norm on $(-1,1)$:
 z = -1/(3+sqrt(8))
 K = sqrt(-2z)
-N = log(abs(z), eps()*(1-abs(z))/K) - 1
+N = ceil(Int, log(abs(z), eps()/2*(1-abs(z))/K) - 1)
 d = K .* z .^(0:N)
 
 # Then, we convert this representation to the expansion in Jacobi polynomials $P_n^{(-\frac{1}{2}, 0)}(y)$:
@@ -60,3 +60,16 @@ savefig(joinpath(GENFIGS, "halfrange.html"))
 ###```@raw html
 ###<object type="text/html" data="../halfrange.html" style="width:100%;height:400px;"></object>
 ###```
+
+# By [Theorem 2.20](https://arxiv.org/abs/2302.08448) it turns out that the *derivatives* of the half-range Chebyshev polynomials are a linear combination of at most two polynomials orthogonal with respect to $\sqrt{(3+y)(1-y)}(1+y)$ on $(-1,1)$. This fact enables us to compute the banded differentiation matrix:
+v̂ = 3*u+XP[1:N+1,1:N]*u
+v = jac2jac(v̂, -0.5, 0.0, 0.5, 1.0; norm1 = true, norm2 = true)
+function threshold!(A::AbstractArray, ϵ)
+    for i in eachindex(A)
+        if abs(A[i]) < ϵ A[i] = 0 end
+    end
+    A
+end
+P′ = plan_modifiedjac2jac(Float64, n+1, 0.5, 1.0, v)
+DP = UpperTriangular(diagm(1=>[sqrt(n*(n+1/2)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(-1/2,0)}(y) = P^{(1/2,1)}(y) D_P.
+DQ = UpperTriangular(threshold!(P′\(DP*(P*I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(y) = Q̂(y) D_Q.

From f0b71c6a9cb746234870905d07a50263b055fe8d Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 21 Mar 2024 12:50:57 -0500
Subject: [PATCH 174/222] add coefficients of the right sizes

fix output sizes
---
 examples/halfrange.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/halfrange.jl b/examples/halfrange.jl
index 5fce0ea3..f5152742 100644
--- a/examples/halfrange.jl
+++ b/examples/halfrange.jl
@@ -43,7 +43,8 @@ qvals = k-> ichebyshevtransform(q(k))
 
 # With the symmetric Jacobi matrix for $P_n^{(-\frac{1}{2}, 0)}(y)$ and the modified plan, we may compute the modified Jacobi matrix and the corresponding roots (as eigenvalues):
 XP = SymTridiagonal([-inv((4n-1)*(4n-5)) for n in 1:n+1], [4n*(2n-1)/(4n-1)/sqrt((4n-3)*(4n+1)) for n in 1:n])
-XQ = FastTransforms.modified_jacobi_matrix(P, XP);
+XQ = FastTransforms.modified_jacobi_matrix(P, XP)
+SymTridiagonal(XQ.dv[1:10], XQ.ev[1:9])
 
 # And we plot:
 x = (chebyshevpoints(Float64, n+1, Val(1)) .+ 1 ) ./ 2
@@ -62,7 +63,7 @@ savefig(joinpath(GENFIGS, "halfrange.html"))
 ###```
 
 # By [Theorem 2.20](https://arxiv.org/abs/2302.08448) it turns out that the *derivatives* of the half-range Chebyshev polynomials are a linear combination of at most two polynomials orthogonal with respect to $\sqrt{(3+y)(1-y)}(1+y)$ on $(-1,1)$. This fact enables us to compute the banded differentiation matrix:
-v̂ = 3*u+XP[1:N+1,1:N]*u
+v̂ = 3*[u; 0]+XP[1:N+1, 1:N]*u
 v = jac2jac(v̂, -0.5, 0.0, 0.5, 1.0; norm1 = true, norm2 = true)
 function threshold!(A::AbstractArray, ϵ)
     for i in eachindex(A)
@@ -73,3 +74,4 @@ end
 P′ = plan_modifiedjac2jac(Float64, n+1, 0.5, 1.0, v)
 DP = UpperTriangular(diagm(1=>[sqrt(n*(n+1/2)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(-1/2,0)}(y) = P^{(1/2,1)}(y) D_P.
 DQ = UpperTriangular(threshold!(P′\(DP*(P*I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(y) = Q̂(y) D_Q.
+UpperTriangular(DQ[1:10,1:10])

From b2cd0e813c6730bbfec66c3b37b4340daf261fca Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 21 Mar 2024 13:04:58 -0500
Subject: [PATCH 175/222] update indexing

---
 examples/halfrange.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/halfrange.jl b/examples/halfrange.jl
index f5152742..e9de076a 100644
--- a/examples/halfrange.jl
+++ b/examples/halfrange.jl
@@ -63,7 +63,7 @@ savefig(joinpath(GENFIGS, "halfrange.html"))
 ###```
 
 # By [Theorem 2.20](https://arxiv.org/abs/2302.08448) it turns out that the *derivatives* of the half-range Chebyshev polynomials are a linear combination of at most two polynomials orthogonal with respect to $\sqrt{(3+y)(1-y)}(1+y)$ on $(-1,1)$. This fact enables us to compute the banded differentiation matrix:
-v̂ = 3*[u; 0]+XP[1:N+1, 1:N]*u
+v̂ = 3*[u; 0]+XP[1:N+2, 1:N+1]*u
 v = jac2jac(v̂, -0.5, 0.0, 0.5, 1.0; norm1 = true, norm2 = true)
 function threshold!(A::AbstractArray, ϵ)
     for i in eachindex(A)

From 9c379b92b6653a9e50ae38ee887073a749f0b555 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Sun, 28 Apr 2024 16:03:16 +0530
Subject: [PATCH 176/222] Fix type-promotion in BigInt transforms (#244)

---
 src/chebyshevtransform.jl | 4 ++--
 test/chebyshevtests.jl    | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 5303868e..7920e6d0 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -598,7 +598,7 @@ ichebyshevutransform(x, dims...; kwds...) = plan_ichebyshevutransform(x, dims...
 ## Code generation for integer inputs
 
 for func in (:chebyshevtransform,:ichebyshevtransform,:chebyshevutransform,:ichebyshevutransform)
-    @eval $func(x::AbstractVector{T}, dims...; kwds...) where {T<:Integer} = $func(convert(AbstractVector{Float64},x), dims...; kwds...)
+    @eval $func(x::AbstractVector{T}, dims...; kwds...) where {T<:Integer} = $func(convert(AbstractVector{float(T)},x), dims...; kwds...)
 end
 
 
@@ -742,4 +742,4 @@ for pln in (:plan_chebyshevtransform!, :plan_chebyshevtransform,
         $pln(x::AbstractArray, dims...; kws...) = $pln(x, Val(1), dims...; kws...)
         $pln(::Type{T}, szs, dims...; kwds...) where T = $pln(Array{T}(undef, szs...), dims...; kwds...)
     end
-end
\ No newline at end of file
+end
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 3112472d..614f9c6d 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -443,6 +443,10 @@ using FastTransforms, Test
         @test plan_chebyshevtransform!(x)copy(x) ≈ chebyshevtransform(x)
         @test plan_ichebyshevtransform!(x)copy(x) ≈ ichebyshevtransform(x)
     end
+    @testset "BigInt" begin
+        x = big(10)^400 .+ BigInt[1,2,3]
+        @test ichebyshevtransform(chebyshevtransform(x)) ≈ x
+    end
 
     @testset "immutable vectors" begin
         F = plan_chebyshevtransform([1.,2,3])

From 8caad8184c15c2e26d27643b864d56d96a92a899 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Tue, 7 May 2024 12:35:32 +0530
Subject: [PATCH 177/222] Add PkgEval badge to readme (#245)

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index ac08bd9e..0686794d 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # FastTransforms.jl
 
 [![Build Status](https://github.com/JuliaApproximation/FastTransforms.jl/workflows/CI/badge.svg)](https://github.com/JuliaApproximation/FastTransforms.jl/actions?query=workflow%3ACI) [![codecov](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl/branch/master/graph/badge.svg?token=BxTvSNgmLL)](https://codecov.io/gh/JuliaApproximation/FastTransforms.jl) [![](https://img.shields.io/badge/docs-stable-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/stable) [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaApproximation.github.io/FastTransforms.jl/dev)
+[![pkgeval](https://juliahub.com/docs/General/FastTransforms/stable/pkgeval.svg)](https://juliaci.github.io/NanosoldierReports/pkgeval_badges/report.html)
 
 `FastTransforms.jl` allows the user to conveniently work with orthogonal polynomials with degrees well into the millions.
 

From 28273776c3b8d804fe704715432e55971e89f1f9 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Tue, 7 May 2024 12:36:14 +0530
Subject: [PATCH 178/222] Remove unused leftover fixkind hack (#240)

---
 src/chebyshevtransform.jl | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index 7920e6d0..e98f4ed5 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -22,11 +22,9 @@ ChebyshevTransformPlan{T,kind}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T
 
 # jump through some hoops to make inferrable
 
-_fftKtype(::Val{N}, _...) where N = Vector{Int32}
-
 function plan_chebyshevtransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevTransformPlan{T,1,_fftKtype(Val{N}(), dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevTransformPlan{T,1}(FFTW.plan_r2r!(x, FIRSTKIND, dims...; kws...))
     end
@@ -39,7 +37,7 @@ end
 
 function plan_chebyshevtransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevTransformPlan{T,1,_fftKtype(Val{N}(), dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevTransformPlan{T,1}(FFTW.plan_r2r(x, FIRSTKIND, dims...; kws...))
     end
@@ -196,7 +194,7 @@ inv(P::IChebyshevTransformPlan{T,1}) where {T} = ChebyshevTransformPlan{T,1}(inv
 
 function plan_ichebyshevtransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevTransformPlan{T,1,_fftKtype(Val{N}(), dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevTransformPlan{T,1}(FFTW.plan_r2r!(x, IFIRSTKIND, dims...; kws...))
     end
@@ -208,7 +206,7 @@ end
 
 function plan_ichebyshevtransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevTransformPlan{T,1,_fftKtype(Val{N}(), dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevTransformPlan{T,1}(FFTW.plan_r2r(x, IFIRSTKIND, dims...; kws...))
     end
@@ -335,7 +333,7 @@ ChebyshevUTransformPlan{T,kind}(plan::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {
 
 function plan_chebyshevutransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevUTransformPlan{T,1,_fftKtype(Val{N}(), dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevUTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevUTransformPlan{T,1}(FFTW.plan_r2r!(x, UFIRSTKIND, dims...; kws...))
     end
@@ -347,7 +345,7 @@ end
 
 function plan_chebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        ChebyshevUTransformPlan{T,1,_fftKtype(Val{N}(), dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        ChebyshevUTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         ChebyshevUTransformPlan{T,1}(FFTW.plan_r2r(x, UFIRSTKIND, dims...; kws...))
     end
@@ -494,7 +492,7 @@ IChebyshevUTransformPlan{T,kind}(F::FFTW.r2rFFTWPlan{T,K,inplace,N,R}) where {T,
 
 function plan_ichebyshevutransform!(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevUTransformPlan{T,1,_fftKtype(Val{N}(), dims...),true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevUTransformPlan{T,1,Vector{Int32},true,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevUTransformPlan{T,1}(FFTW.plan_r2r!(x, IUFIRSTKIND, dims...; kws...))
     end
@@ -506,7 +504,7 @@ end
 
 function plan_ichebyshevutransform(x::AbstractArray{T,N}, ::Val{1}, dims...; kws...) where {T<:fftwNumber,N}
     if isempty(x)
-        IChebyshevUTransformPlan{T,1,_fftKtype(Val{N}(), dims...),false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
+        IChebyshevUTransformPlan{T,1,Vector{Int32},false,N,isempty(dims) ? NTuple{N,Int} : typeof(dims[1])}()
     else
         IChebyshevUTransformPlan{T,1}(FFTW.plan_r2r(x, IUFIRSTKIND, dims...; kws...))
     end

From da3e86580a8db4ca8a3d58a9fdeff6389676efef Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Tue, 7 May 2024 12:37:48 +0530
Subject: [PATCH 179/222] Use a mutable copy of input if inplace scaling is
 required (#243)

* Use a mutable copy of input if inplace scaling is required

* Convert to Array instead of using similar

* Add test

* Fix type-signature of _plan_mul
---
 src/chebyshevtransform.jl | 8 +++++---
 test/chebyshevtests.jl    | 1 +
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/chebyshevtransform.jl b/src/chebyshevtransform.jl
index e98f4ed5..2e58876d 100644
--- a/src/chebyshevtransform.jl
+++ b/src/chebyshevtransform.jl
@@ -49,8 +49,9 @@ end
 
 
 # convert x if necessary
-@inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::StridedArray{T}) where T = mul!(y, P, x)
-@inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::AbstractArray) where T = mul!(y, P, convert(Array{T}, x))
+_maybemutablecopy(x::StridedArray{T}, ::Type{T}) where {T} = x
+_maybemutablecopy(x, T) = Array{T}(x)
+@inline _plan_mul!(y::AbstractArray{T}, P::Plan{T}, x::AbstractArray) where T = mul!(y, P, _maybemutablecopy(x, T))
 
 
 for op in (:ldiv, :lmul)
@@ -309,7 +310,8 @@ function mul!(y::AbstractArray{T,N}, P::IChebyshevTransformPlan{T,2,K,false,N},
     _icheb2_rescale!(P.plan.region, y)
 end
 
-*(P::IChebyshevTransformPlan{T,kind,K,false,N}, x::AbstractArray{T,N}) where {T,kind,K,N} = mul!(similar(x), P, x)
+*(P::IChebyshevTransformPlan{T,kind,K,false,N}, x::AbstractArray{T,N}) where {T,kind,K,N} =
+    mul!(similar(x), P, _maybemutablecopy(x, T))
 ichebyshevtransform!(x::AbstractArray, dims...; kwds...) = plan_ichebyshevtransform!(x, dims...; kwds...)*x
 ichebyshevtransform(x, dims...; kwds...) = plan_ichebyshevtransform(x, dims...; kwds...)*x
 
diff --git a/test/chebyshevtests.jl b/test/chebyshevtests.jl
index 614f9c6d..763ac3ce 100644
--- a/test/chebyshevtests.jl
+++ b/test/chebyshevtests.jl
@@ -451,6 +451,7 @@ using FastTransforms, Test
     @testset "immutable vectors" begin
         F = plan_chebyshevtransform([1.,2,3])
         @test chebyshevtransform(1.0:3) == F * (1:3)
+        @test ichebyshevtransform(1.0:3) == ichebyshevtransform([1.0:3;])
     end
 
     @testset "inv" begin

From 17f3e594a94418dad8400f05c87dd03c8fbd9403 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 7 Jun 2024 14:03:45 -0500
Subject: [PATCH 180/222] add modified chebyshev generic routine

rename variables in STpH
---
 Project.toml                       |  2 +-
 src/SymmetricToeplitzPlusHankel.jl | 72 +++++++++++++++---------------
 src/libfasttransforms.jl           | 13 ++++++
 3 files changed, 51 insertions(+), 36 deletions(-)

diff --git a/Project.toml b/Project.toml
index 97d36acb..1611c1f7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.16.1"
+version = "0.16.2"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/SymmetricToeplitzPlusHankel.jl b/src/SymmetricToeplitzPlusHankel.jl
index a9bc4013..e542326f 100644
--- a/src/SymmetricToeplitzPlusHankel.jl
+++ b/src/SymmetricToeplitzPlusHankel.jl
@@ -37,7 +37,7 @@ end
 bandwidths(A::SymmetricBandedToeplitzPlusHankel{T}) where T = (A.b, A.b)
 
 #
-# Jac*W-W*Jac' = G*J*G'
+# X'W-W*X = G*J*G'
 # This returns G and J, where J = [0 I; -I 0], respecting the skew-symmetry of the right-hand side.
 #
 function compute_skew_generators(A::SymmetricToeplitzPlusHankel{T}) where T
@@ -56,80 +56,82 @@ function cholesky(A::SymmetricToeplitzPlusHankel{T}) where T
     n = size(A, 1)
     G, J = compute_skew_generators(A)
     L = zeros(T, n, n)
-    r = A[:, 1]
-    r2 = zeros(T, n)
+    c = A[:, 1]
+    ĉ = zeros(T, n)
     l = zeros(T, n)
     v = zeros(T, n)
-    col1 = zeros(T, n)
-    STPHcholesky!(L, G, r, r2, l, v, col1, n)
+    row1 = zeros(T, n)
+    STPHcholesky!(L, G, c, ĉ, l, v, row1, n)
     return Cholesky(L, 'L', 0)
 end
 
-function STPHcholesky!(L::Matrix{T}, G, r, r2, l, v, col1, n) where T
+function STPHcholesky!(L::Matrix{T}, G, c, ĉ, l, v, row1, n) where T
     @inbounds @simd for k in 1:n-1
-        x = sqrt(r[1])
+        d = sqrt(c[1])
         for j in 1:n-k+1
-            L[j+k-1, k] = l[j] = r[j]/x
+            L[j+k-1, k] = l[j] = c[j]/d
         end
         for j in 1:n-k+1
-            v[j] = G[j, 1]*G[1,2]-G[j,2]*G[1,1]
+            v[j] = G[j, 1]*G[1, 2] - G[j, 2]*G[1, 1]
         end
-        F12 = k == 1 ? T(2) : T(1)
-        r2[1] = (r[2] - v[1])/F12
+        X21 = k == 1 ? T(2) : T(1)
+        ĉ[1] = (c[2] - v[1])/X21
         for j in 2:n-k
-            r2[j] = (r[j+1]+r[j-1] + r[1]*col1[j] - col1[1]*r[j] - v[j])/F12
+            ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j] - v[j])/X21
         end
-        r2[n-k+1] = (r[n-k] + r[1]*col1[n-k+1] - col1[1]*r[n-k+1] - v[n-k+1])/F12
-        cst = r[2]/x
+        ĉ[n-k+1] = (c[n-k] + c[1]*row1[n-k+1] - row1[1]*c[n-k+1] - v[n-k+1])/X21
+        cst = c[2]/d
         for j in 1:n-k
-            r[j] = r2[j+1] - cst*l[j+1]
+            c[j] = ĉ[j+1] - cst*l[j+1]
         end
+        cst = X21/d
         for j in 1:n-k
-            col1[j] = -F12/x*l[j+1]
+            row1[j] = -cst*l[j+1]
         end
-        c1 = G[1, 1]
-        c2 = G[1, 2]
+        gd1 = G[1, 1]/d
+        gd2 = G[1, 2]/d
         for j in 1:n-k
-            G[j, 1] = G[j+1, 1] - l[j+1]*c1/x
-            G[j, 2] = G[j+1, 2] - l[j+1]*c2/x
+            G[j, 1] = G[j+1, 1] - l[j+1]*gd1
+            G[j, 2] = G[j+1, 2] - l[j+1]*gd2
         end
     end
-    L[n, n] = sqrt(r[1])
+    L[n, n] = sqrt(c[1])
 end
 
 function cholesky(A::SymmetricBandedToeplitzPlusHankel{T}) where T
     n = size(A, 1)
     b = A.b
     R = BandedMatrix{T}(undef, (n, n), (0, bandwidth(A, 2)))
-    r = A[1:b+2, 1]
-    r2 = zeros(T, b+3)
+    c = A[1:b+2, 1]
+    ĉ = zeros(T, b+3)
     l = zeros(T, b+3)
-    col1 = zeros(T, b+2)
-    SBTPHcholesky!(R, r, r2, l, col1, n, b)
+    row1 = zeros(T, b+2)
+    SBTPHcholesky!(R, c, ĉ, l, row1, n, b)
     return Cholesky(R, 'U', 0)
 end
 
-function SBTPHcholesky!(R::BandedMatrix{T}, r, r2, l, col1, n, b) where T
+function SBTPHcholesky!(R::BandedMatrix{T}, c, ĉ, l, row1, n, b) where T
     @inbounds @simd for k in 1:n
-        x = sqrt(r[1])
+        d = sqrt(c[1])
         for j in 1:b+1
-            l[j] = r[j]/x
+            l[j] = c[j]/d
         end
         for j in 1:min(n-k+1, b+1)
             R[k, j+k-1] = l[j]
         end
-        F12 = k == 1 ? T(2) : T(1)
-        r2[1] = r[2]/F12
+        X21 = k == 1 ? T(2) : T(1)
+        ĉ[1] = c[2]/X21
         for j in 2:b+1
-            r2[j] = (r[j+1]+r[j-1] + r[1]*col1[j] - col1[1]*r[j])/F12
+            ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j])/X21
         end
-        r2[b+2] = (r[b+1] + r[1]*col1[b+2] - col1[1]*r[b+2])/F12
-        cst = r[2]/x
+        ĉ[b+2] = (c[b+1] + c[1]*row1[b+2] - row1[1]*c[b+2])/X21
+        cst = c[2]/d
         for j in 1:b+2
-            r[j] = r2[j+1] - cst*l[j+1]
+            c[j] = ĉ[j+1] - cst*l[j+1]
         end
+        cst = X21/d
         for j in 1:b+2
-            col1[j] = -F12/x*l[j+1]
+            row1[j] = -cst*l[j+1]
         end
     end
 end
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index c1856cbf..ea724ce9 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -1250,3 +1250,16 @@ for (fC, T) in ((:execute_jacobi_similarityf, Float32), (:execute_jacobi_similar
         end
     end
 end
+
+function modified_jacobi_matrix(R, XP)
+    n = size(R, 1) - 1
+    XQ = SymTridiagonal(zeros(n), zeros(n-1))
+    XQ.dv[1] = (R[1, 1]*XP[1, 1] + R[1, 2]*XP[2, 1])/R[1, 1]
+    for i in 1:n-1
+        XQ.ev[i] = R[i+1, i+1]*XP[i+1, i]/R[i, i]
+    end
+    for i in 2:n
+        XQ.dv[i] = (R[i, i]*XP[i,i] + R[i, i+1]*XP[i+1, i] - XQ[i, i-1]*R[i-1, i])/R[i, i]
+    end
+    return XQ
+end

From 8f58d059b7a52fbc6fd5a9456be503c592faefb7 Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Thu, 20 Jun 2024 01:05:26 +0530
Subject: [PATCH 181/222] Disable fail-fast in CI

This will run all tests, even if some of them fail. This will help us recognize failures on specific platforms.
---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f926658f..4224867f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,6 +7,7 @@ jobs:
     name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
     runs-on: ${{ matrix.os }}
     strategy:
+      fail-fast: false
       matrix:
         version:
           - '1.7'

From e5cc4c4a38455a22ceb8dd0c765d6a6d1ee3076e Mon Sep 17 00:00:00 2001
From: Jishnu Bhattacharya <jishnub.github@gmail.com>
Date: Fri, 21 Jun 2024 09:29:48 +0530
Subject: [PATCH 182/222] Bump version to v0.16.3 (#247)

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 1611c1f7..678e3dd7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.16.2"
+version = "0.16.3"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From d8cceddd9e9b0829602c2a830de2588655754ea6 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 12 Jul 2024 15:28:24 -0500
Subject: [PATCH 183/222] add FastCholesky for any OPs

X'W-WX == GJG', rank(G) = 2. This allows W to be Cholesky-factored in O(n^2).
---
 Project.toml                             |   2 +-
 src/SymmetricToeplitzPlusHankel.jl       | 136 ++++++++++++++++++-----
 test/symmetrictoeplitzplushankeltests.jl |  12 ++
 3 files changed, 120 insertions(+), 30 deletions(-)

diff --git a/Project.toml b/Project.toml
index 678e3dd7..a9269fe0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.16.3"
+version = "0.16.4"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/SymmetricToeplitzPlusHankel.jl b/src/SymmetricToeplitzPlusHankel.jl
index e542326f..1084977b 100644
--- a/src/SymmetricToeplitzPlusHankel.jl
+++ b/src/SymmetricToeplitzPlusHankel.jl
@@ -38,23 +38,23 @@ bandwidths(A::SymmetricBandedToeplitzPlusHankel{T}) where T = (A.b, A.b)
 
 #
 # X'W-W*X = G*J*G'
-# This returns G and J, where J = [0 I; -I 0], respecting the skew-symmetry of the right-hand side.
+# This returns G, where J = [0 1; -1 0], respecting the skew-symmetry of the right-hand side.
 #
 function compute_skew_generators(A::SymmetricToeplitzPlusHankel{T}) where T
     v = A.v
     n = size(A, 1)
-    J = [zero(T) one(T); -one(T) zero(T)]
     G = zeros(T, n, 2)
     G[n, 1] = one(T)
-    u2 = reverse(v[2:n+1])
-    u2[1:n-1] .+= v[n+1:2n-1]
-    G[:, 2] .= -u2
-    G, J
+    @inbounds @simd for j in 1:n-1
+        G[j, 2] = -(v[n+2-j] + v[n+j])
+    end
+    G[n, 2] = -v[2]
+    G
 end
 
 function cholesky(A::SymmetricToeplitzPlusHankel{T}) where T
     n = size(A, 1)
-    G, J = compute_skew_generators(A)
+    G = compute_skew_generators(A)
     L = zeros(T, n, n)
     c = A[:, 1]
     ĉ = zeros(T, n)
@@ -74,20 +74,29 @@ function STPHcholesky!(L::Matrix{T}, G, c, ĉ, l, v, row1, n) where T
         for j in 1:n-k+1
             v[j] = G[j, 1]*G[1, 2] - G[j, 2]*G[1, 1]
         end
-        X21 = k == 1 ? T(2) : T(1)
-        ĉ[1] = (c[2] - v[1])/X21
-        for j in 2:n-k
-            ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j] - v[j])/X21
+        if k == 1
+            for j in 2:n-k
+                ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j] - v[j])/2
+            end
+            ĉ[n-k+1] = (c[n-k] + c[1]*row1[n-k+1] - row1[1]*c[n-k+1] - v[n-k+1])/2
+            cst = 2/d
+            for j in 1:n-k
+                row1[j] = -cst*l[j+1]
+            end
+        else
+            for j in 2:n-k
+                ĉ[j] = c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j] - v[j]
+            end
+            ĉ[n-k+1] = c[n-k] + c[1]*row1[n-k+1] - row1[1]*c[n-k+1] - v[n-k+1]
+            cst = 1/d
+            for j in 1:n-k
+                row1[j] = -cst*l[j+1]
+            end
         end
-        ĉ[n-k+1] = (c[n-k] + c[1]*row1[n-k+1] - row1[1]*c[n-k+1] - v[n-k+1])/X21
         cst = c[2]/d
         for j in 1:n-k
             c[j] = ĉ[j+1] - cst*l[j+1]
         end
-        cst = X21/d
-        for j in 1:n-k
-            row1[j] = -cst*l[j+1]
-        end
         gd1 = G[1, 1]/d
         gd2 = G[1, 2]/d
         for j in 1:n-k
@@ -101,37 +110,106 @@ end
 function cholesky(A::SymmetricBandedToeplitzPlusHankel{T}) where T
     n = size(A, 1)
     b = A.b
-    R = BandedMatrix{T}(undef, (n, n), (0, bandwidth(A, 2)))
+    L = BandedMatrix{T}(undef, (n, n), (bandwidth(A, 1), 0))
     c = A[1:b+2, 1]
     ĉ = zeros(T, b+3)
     l = zeros(T, b+3)
     row1 = zeros(T, b+2)
-    SBTPHcholesky!(R, c, ĉ, l, row1, n, b)
-    return Cholesky(R, 'U', 0)
+    SBTPHcholesky!(L, c, ĉ, l, row1, n, b)
+    return Cholesky(L, 'L', 0)
 end
 
-function SBTPHcholesky!(R::BandedMatrix{T}, c, ĉ, l, row1, n, b) where T
+function SBTPHcholesky!(L::BandedMatrix{T}, c, ĉ, l, row1, n, b) where T
     @inbounds @simd for k in 1:n
         d = sqrt(c[1])
         for j in 1:b+1
             l[j] = c[j]/d
         end
         for j in 1:min(n-k+1, b+1)
-            R[k, j+k-1] = l[j]
+            L[j+k-1, k] = l[j]
         end
-        X21 = k == 1 ? T(2) : T(1)
-        ĉ[1] = c[2]/X21
-        for j in 2:b+1
-            ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j])/X21
+        if k == 1
+            for j in 2:b+1
+                ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j])/2
+            end
+            ĉ[b+2] = (c[b+1] + c[1]*row1[b+2] - row1[1]*c[b+2])/2
+            cst = 2/d
+            for j in 1:b+2
+                row1[j] = -cst*l[j+1]
+            end
+        else
+            for j in 2:b+1
+                ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j])
+            end
+            ĉ[b+2] = (c[b+1] + c[1]*row1[b+2] - row1[1]*c[b+2])
+            cst = 1/d
+            for j in 1:b+2
+                row1[j] = -cst*l[j+1]
+            end
         end
-        ĉ[b+2] = (c[b+1] + c[1]*row1[b+2] - row1[1]*c[b+2])/X21
         cst = c[2]/d
         for j in 1:b+2
             c[j] = ĉ[j+1] - cst*l[j+1]
         end
-        cst = X21/d
-        for j in 1:b+2
-            row1[j] = -cst*l[j+1]
+    end
+end
+
+
+
+#
+# X'W-W*X = G*J*G'
+# This returns G, where J = [0 1; -1 0], respecting the skew-symmetry of the right-hand side.
+#
+function compute_skew_generators(W::Symmetric{T, <: AbstractMatrix{T}}, X::Tridiagonal{T, Vector{T}}) where T
+    @assert size(W) == size(X)
+    m, n = size(W)
+    G = zeros(T, n, 2)
+    G[n, 1] = one(T)
+    G[:, 2] .= W[n-1, :]*X[n-1, n] - X'W[:, n]
+    return G
+end
+
+function fastcholesky(W::Symmetric{T, <: AbstractMatrix{T}}, X::Tridiagonal{T, Vector{T}}) where T
+    n = size(W, 1)
+    G = compute_skew_generators(W, X)
+    L = zeros(T, n, n)
+    c = W[:, 1]
+    ĉ = zeros(T, n)
+    l = zeros(T, n)
+    v = zeros(T, n)
+    row1 = zeros(T, n)
+    fastcholesky!(L, X, G, c, ĉ, l, v, row1, n)
+    return Cholesky(L, 'L', 0)
+end
+
+
+function fastcholesky!(L::Matrix{T}, X::Tridiagonal{T, Vector{T}}, G, c, ĉ, l, v, row1, n) where T
+    @inbounds @simd for k in 1:n-1
+        d = sqrt(c[k])
+        for j in k:n
+            L[j, k] = l[j] = c[j]/d
+        end
+        for j in k:n
+            v[j] = G[j, 1]*G[k, 2] - G[j, 2]*G[k, 1]
+        end
+        for j in k+1:n-1
+            ĉ[j] = (X[j-1, j]*c[j-1] + (X[j, j]-X[k, k])*c[j] + X[j+1, j]*c[j+1] + c[k]*row1[j] - row1[k]*c[j] - v[j])/X[k+1, k]
+        end
+        ĉ[n] = (X[n-1, n]*c[n-1] + (X[n, n]-X[k, k])*c[n] + c[k]*row1[n] - row1[k]*c[n] - v[n])/X[k+1, k]
+        cst = X[k+1, k]/d
+        for j in k+1:n
+            row1[j] = -cst*l[j]
+        end
+        cst = c[k+1]/d
+        for j in k:n
+            c[j] = ĉ[j] - cst*l[j]
+        end
+        gd1 = G[k, 1]/d
+        gd2 = G[k, 2]/d
+        for j in k:n
+            G[j, 1] -= l[j]*gd1
+            G[j, 2] -= l[j]*gd2
         end
     end
+    L[n, n] = sqrt(c[n])
 end
diff --git a/test/symmetrictoeplitzplushankeltests.jl b/test/symmetrictoeplitzplushankeltests.jl
index c26c38d3..8f37e775 100644
--- a/test/symmetrictoeplitzplushankeltests.jl
+++ b/test/symmetrictoeplitzplushankeltests.jl
@@ -37,3 +37,15 @@ end
         @test R ≈ U
     end
 end
+
+@testset "Fast Cholesky" begin
+    n = 128
+    for T in (Float32, Float64, BigFloat)
+        R = plan_leg2cheb(T, n; normcheb=true)*I
+        X = Tridiagonal([T(n)/(2n-1) for n in 1:n-1], zeros(T, n), [T(n)/(2n+1) for n in 1:n-1]) # Legendre X
+        W = Symmetric(R'R)
+        F = FastTransforms.fastcholesky(W, X)
+        @test F.L*F.L' ≈ W
+        @test F.U ≈ R
+    end
+end

From 04556c54e8efc689dd1ff98826bdae2e54a365e1 Mon Sep 17 00:00:00 2001
From: ioannisPApapadopoulos <john.papadopoulos0@gmail.com>
Date: Fri, 6 Sep 2024 10:06:50 +0200
Subject: [PATCH 184/222] * and \ for FTPlan applied to an Array

---
 src/FastTransforms.jl |  1 +
 src/arrays.jl         | 48 +++++++++++++++++++++++++++++++++++++++++++
 test/arraystests.jl   | 23 +++++++++++++++++++++
 test/runtests.jl      |  1 +
 4 files changed, 73 insertions(+)
 create mode 100644 src/arrays.jl
 create mode 100644 test/arraystests.jl

diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 82c34990..fe23f46e 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -128,6 +128,7 @@ for f in (:jac2jac,
     @eval $f(x::AbstractArray, y...; z...) = $lib_f(x, y...; z...)
 end
 
+include("arrays.jl")
 # following use Toeplitz-Hankel to avoid expensive plans
 # for f in (:leg2cheb, :cheb2leg, :ultra2ultra)
 #     th_f = Symbol("th_", f)
diff --git a/src/arrays.jl b/src/arrays.jl
new file mode 100644
index 00000000..1cc18ec9
--- /dev/null
+++ b/src/arrays.jl
@@ -0,0 +1,48 @@
+struct ArrayPlan{T, F<:FTPlan{<:T}, Szs<:Tuple, Dims<:Tuple{<:Int}} <: Plan{T}
+    F::F
+    szs::Szs
+    dims::Dims
+end
+size(P::ArrayPlan, k...) = P.szs[[k...]]
+
+function ArrayPlan(F::FTPlan{<:T}, c::AbstractArray{T}, dims::Tuple{<:Int}=(1,)) where T
+    szs = size(c)
+    @assert F.n == szs[dims[1]]
+    ArrayPlan(F, size(c), dims)
+end
+
+function inv_perm(d::Vector{<:Int})
+    inv_d = Vector{Int}(undef, length(d))
+    for (i, val) in enumerate(d)
+        inv_d[val] = i
+    end
+    return inv_d
+end
+
+function *(P::ArrayPlan, f::AbstractArray)
+    F, dims, szs = P.F, P.dims, P.szs
+    @assert length(dims) == 1
+    @assert szs == size(f)
+    d = first(dims)
+
+    perm = [d; setdiff(1:ndims(f), d)]
+    fp = permutedims(f, perm)
+
+    fr = reshape(fp, size(fp,1), prod(size(fp)[2:end]))
+
+    permutedims(reshape(F*fr, size(fp)...), inv_perm(perm))
+end
+
+function \(P::ArrayPlan, f::AbstractArray)
+    F, dims, szs = P.F, P.dims, P.szs
+    @assert length(dims) == 1
+    @assert szs == size(f)
+    d = first(dims)
+
+    perm = [d; setdiff(1:ndims(f), d)]
+    fp = permutedims(f, perm)
+
+    fr = reshape(fp, size(fp,1), prod(size(fp)[2:end]))
+
+    permutedims(reshape(F\fr, size(fp)...), inv_perm(perm))
+end
\ No newline at end of file
diff --git a/test/arraystests.jl b/test/arraystests.jl
new file mode 100644
index 00000000..ca024a64
--- /dev/null
+++ b/test/arraystests.jl
@@ -0,0 +1,23 @@
+using FastTransforms, Test
+import FastTransforms: ArrayPlan
+
+@testset "Array transform"  begin
+    c = randn(5,20,10)
+    F = plan_cheb2leg(c)
+    FT = ArrayPlan(F, c)
+
+    f = similar(c);
+    for k in axes(c,3)
+        f[:,:,k] = (F*c[:,:,k])
+    end
+    @test f ≈ FT*c
+    @test c ≈ FT\f
+
+    F = plan_cheb2leg(Vector{Float64}(axes(c,2)))
+    FT = ArrayPlan(F, c, (2,))
+    for k in axes(c,3)
+        f[:,:,k] = (F*c[:,:,k]')'
+    end
+    @test f ≈ FT*c
+    @test c ≈ FT\f
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index de16f36b..a4881c3b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -12,3 +12,4 @@ include("clenshawtests.jl")
 include("toeplitzplanstests.jl")
 include("toeplitzhankeltests.jl")
 include("symmetrictoeplitzplushankeltests.jl")
+include("arraystests.jl")
\ No newline at end of file

From 16b6f4356c31b4c16a14c130777e6b52657fea46 Mon Sep 17 00:00:00 2001
From: ioannisPApapadopoulos <john.papadopoulos0@gmail.com>
Date: Fri, 6 Sep 2024 11:36:05 +0200
Subject: [PATCH 185/222] N-dimensional plan

---
 src/arrays.jl       | 48 ++++++++++++++++++++++++++++--
 test/arraystests.jl | 72 +++++++++++++++++++++++++++++++++++----------
 2 files changed, 102 insertions(+), 18 deletions(-)

diff --git a/src/arrays.jl b/src/arrays.jl
index 1cc18ec9..5388bba1 100644
--- a/src/arrays.jl
+++ b/src/arrays.jl
@@ -1,8 +1,10 @@
-struct ArrayPlan{T, F<:FTPlan{<:T}, Szs<:Tuple, Dims<:Tuple{<:Int}} <: Plan{T}
-    F::F
+struct ArrayPlan{T, FF<:FTPlan{<:T}, Szs<:Tuple, Dims<:Tuple{<:Int}} <: Plan{T}
+    F::FF
     szs::Szs
     dims::Dims
 end
+size(P::ArrayPlan) = P.szs
+size(P::ArrayPlan, k::Int) = P.szs[k]
 size(P::ArrayPlan, k...) = P.szs[[k...]]
 
 function ArrayPlan(F::FTPlan{<:T}, c::AbstractArray{T}, dims::Tuple{<:Int}=(1,)) where T
@@ -18,6 +20,7 @@ function inv_perm(d::Vector{<:Int})
     end
     return inv_d
 end
+inv_perm(d::Tuple) = inv_perm([d...])
 
 function *(P::ArrayPlan, f::AbstractArray)
     F, dims, szs = P.F, P.dims, P.szs
@@ -45,4 +48,45 @@ function \(P::ArrayPlan, f::AbstractArray)
     fr = reshape(fp, size(fp,1), prod(size(fp)[2:end]))
 
     permutedims(reshape(F\fr, size(fp)...), inv_perm(perm))
+end
+
+struct NDimsPlan{T, FF<:ArrayPlan{<:T}, Dims<:Tuple} <: Plan{T}
+    F::FF
+    dims::Dims
+    function NDimsPlan(F, dims)
+        if length(Set(size(F, dims...))) > 1
+            error("Different size in dims axes not yet implemented in N-dimensional transform.")
+        end
+        new{eltype(F), typeof(F), typeof(dims)}(F, dims)
+    end
+end
+
+size(P::NDimsPlan) = size(P.F)
+size(P::NDimsPlan, k::Int) = size(P.F, k)
+size(P::NDimsPlan, k...) = size(P.F, k...)
+
+function *(P::NDimsPlan, f::AbstractArray)
+    F, dims = P.F, P.dims
+    @assert size(F) == size(f)
+    g = copy(f)
+    t = 1:ndims(g)
+    for d in dims
+        perm = ntuple(k -> k == 1 ? t[d] : k == d ? t[1] : t[k], length(t))
+        gp = permutedims(g, perm)
+        g = permutedims(F*gp, inv_perm(perm))
+    end
+    return g
+end
+
+function \(P::NDimsPlan, f::AbstractArray)
+    F, dims = P.F, P.dims
+    @assert size(F) == size(f)
+    g = copy(f)
+    t = 1:ndims(g)
+    for d in dims
+        perm = ntuple(k -> k == 1 ? t[d] : k == d ? t[1] : t[k], length(t))
+        gp = permutedims(g, perm)
+        g = permutedims(F\gp, inv_perm(perm))
+    end
+    return g
 end
\ No newline at end of file
diff --git a/test/arraystests.jl b/test/arraystests.jl
index ca024a64..e8c84cce 100644
--- a/test/arraystests.jl
+++ b/test/arraystests.jl
@@ -1,23 +1,63 @@
 using FastTransforms, Test
-import FastTransforms: ArrayPlan
+import FastTransforms: ArrayPlan, NDimsPlan
 
 @testset "Array transform"  begin
-    c = randn(5,20,10)
-    F = plan_cheb2leg(c)
-    FT = ArrayPlan(F, c)
+    @testset "ArrayPlan" begin
+        c = randn(5,20,10)
+        F = plan_cheb2leg(c)
+        FT = ArrayPlan(F, c)
 
-    f = similar(c);
-    for k in axes(c,3)
-        f[:,:,k] = (F*c[:,:,k])
+        @test size(FT) == size(c)
+        @test size(FT,1) == size(c,1)
+        @test size(FT,1,2) == (size(c,1), size(c,2))
+
+        f = similar(c);
+        for k in axes(c,3)
+            f[:,:,k] = (F*c[:,:,k])
+        end
+        @test f ≈ FT*c
+        @test c ≈ FT\f
+
+        F = plan_cheb2leg(Vector{Float64}(axes(c,2)))
+        FT = ArrayPlan(F, c, (2,))
+        for k in axes(c,3)
+            f[:,:,k] = (F*c[:,:,k]')'
+        end
+        @test f ≈ FT*c
+        @test c ≈ FT\f
     end
-    @test f ≈ FT*c
-    @test c ≈ FT\f
 
-    F = plan_cheb2leg(Vector{Float64}(axes(c,2)))
-    FT = ArrayPlan(F, c, (2,))
-    for k in axes(c,3)
-        f[:,:,k] = (F*c[:,:,k]')'
+    @testset "NDimsPlan" begin
+        c = randn(20,10,20)
+        @test_throws ErrorException("Different size in dims axes not yet implemented in N-dimensional transform.") NDimsPlan(ArrayPlan(plan_cheb2leg(c), c), (1,2))        
+
+        c = randn(20,20,5);
+        F = plan_cheb2leg(c)
+        FT = ArrayPlan(F, c)
+        P = NDimsPlan(FT, (1,2))
+
+        @test size(P) == size(c)
+        @test size(P,1) == size(c,1)
+        @test size(P,1,2) == (size(c,1), size(c,2))
+
+
+        f = similar(c);
+        for k in axes(f,3)
+            f[:,:,k] = (F*(F*c[:,:,k])')'
+        end
+        @test f ≈ P*c
+        @test c ≈ P\f
+
+        c = randn(10,5,10,60)
+        F = plan_cheb2leg(c)
+        P = NDimsPlan(ArrayPlan(F, c), (1,3))
+        f = similar(c)
+        for i in axes(f,2), j in axes(f,4)
+            f[:,i,:,j] = (F*(F*c[:,i,:,j])')'
+        end
+        @test f ≈ P*c
+        @test c ≈ P\f
     end
-    @test f ≈ FT*c
-    @test c ≈ FT\f
-end
\ No newline at end of file
+end
+
+

From 87b1924c78bf027bf6c31cab6f68348e35189f6e Mon Sep 17 00:00:00 2001
From: ioannisPApapadopoulos <john.papadopoulos0@gmail.com>
Date: Fri, 6 Sep 2024 11:53:50 +0200
Subject: [PATCH 186/222] type-inference

---
 src/arrays.jl       | 4 ++--
 test/arraystests.jl | 7 ++++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/arrays.jl b/src/arrays.jl
index 5388bba1..2b3843b9 100644
--- a/src/arrays.jl
+++ b/src/arrays.jl
@@ -71,7 +71,7 @@ function *(P::NDimsPlan, f::AbstractArray)
     g = copy(f)
     t = 1:ndims(g)
     for d in dims
-        perm = ntuple(k -> k == 1 ? t[d] : k == d ? t[1] : t[k], length(t))
+        perm = ntuple(k -> k == 1 ? t[d] : k == d ? t[1] : t[k], ndims(g))
         gp = permutedims(g, perm)
         g = permutedims(F*gp, inv_perm(perm))
     end
@@ -84,7 +84,7 @@ function \(P::NDimsPlan, f::AbstractArray)
     g = copy(f)
     t = 1:ndims(g)
     for d in dims
-        perm = ntuple(k -> k == 1 ? t[d] : k == d ? t[1] : t[k], length(t))
+        perm = ntuple(k -> k == 1 ? t[d] : k == d ? t[1] : t[k], ndims(g))
         gp = permutedims(g, perm)
         g = permutedims(F\gp, inv_perm(perm))
     end
diff --git a/test/arraystests.jl b/test/arraystests.jl
index e8c84cce..2db0a756 100644
--- a/test/arraystests.jl
+++ b/test/arraystests.jl
@@ -31,6 +31,12 @@ import FastTransforms: ArrayPlan, NDimsPlan
         c = randn(20,10,20)
         @test_throws ErrorException("Different size in dims axes not yet implemented in N-dimensional transform.") NDimsPlan(ArrayPlan(plan_cheb2leg(c), c), (1,2))        
 
+        c = randn(5,20)
+        F = plan_cheb2leg(c)
+        FT = ArrayPlan(F, c)
+        P = NDimsPlan(FT, (1,))
+        @test F*c ≈ FT*c ≈ P*c
+
         c = randn(20,20,5);
         F = plan_cheb2leg(c)
         FT = ArrayPlan(F, c)
@@ -40,7 +46,6 @@ import FastTransforms: ArrayPlan, NDimsPlan
         @test size(P,1) == size(c,1)
         @test size(P,1,2) == (size(c,1), size(c,2))
 
-
         f = similar(c);
         for k in axes(f,3)
             f[:,:,k] = (F*(F*c[:,:,k])')'

From e0dd9d6c1e8d9f9ac128ca1d80fce53a1bbd4a67 Mon Sep 17 00:00:00 2001
From: ioannisPApapadopoulos <john.papadopoulos0@gmail.com>
Date: Fri, 6 Sep 2024 12:48:18 +0200
Subject: [PATCH 187/222] fix N-dimensional transform when dims[1] != 1

---
 src/arrays.jl       | 33 ++++++++++++++++++++-------------
 test/arraystests.jl | 16 ++++++++--------
 2 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/src/arrays.jl b/src/arrays.jl
index 2b3843b9..9c19f127 100644
--- a/src/arrays.jl
+++ b/src/arrays.jl
@@ -31,7 +31,7 @@ function *(P::ArrayPlan, f::AbstractArray)
     perm = [d; setdiff(1:ndims(f), d)]
     fp = permutedims(f, perm)
 
-    fr = reshape(fp, size(fp,1), prod(size(fp)[2:end]))
+    fr = reshape(fp, size(fp,1), :)
 
     permutedims(reshape(F*fr, size(fp)...), inv_perm(perm))
 end
@@ -45,33 +45,39 @@ function \(P::ArrayPlan, f::AbstractArray)
     perm = [d; setdiff(1:ndims(f), d)]
     fp = permutedims(f, perm)
 
-    fr = reshape(fp, size(fp,1), prod(size(fp)[2:end]))
+    fr = reshape(fp, size(fp,1), :)
 
     permutedims(reshape(F\fr, size(fp)...), inv_perm(perm))
 end
 
-struct NDimsPlan{T, FF<:ArrayPlan{<:T}, Dims<:Tuple} <: Plan{T}
+struct NDimsPlan{T, FF<:ArrayPlan{<:T}, Szs<:Tuple, Dims<:Tuple} <: Plan{T}
     F::FF
+    szs::Szs
     dims::Dims
-    function NDimsPlan(F, dims)
-        if length(Set(size(F, dims...))) > 1
+    function NDimsPlan(F, szs, dims)
+        if length(Set(szs[[dims...]])) > 1
             error("Different size in dims axes not yet implemented in N-dimensional transform.")
         end
-        new{eltype(F), typeof(F), typeof(dims)}(F, dims)
+        new{eltype(F), typeof(F), typeof(szs), typeof(dims)}(F, szs, dims)
     end
 end
 
-size(P::NDimsPlan) = size(P.F)
-size(P::NDimsPlan, k::Int) = size(P.F, k)
-size(P::NDimsPlan, k...) = size(P.F, k...)
+size(P::NDimsPlan) = P.szs
+size(P::NDimsPlan, k::Int) = P.szs[k]
+size(P::NDimsPlan, k...) = P.szs[[k...]]
+
+function NDimsPlan(F::FTPlan, szs::Tuple, dims::Tuple)
+    NDimsPlan(ArrayPlan(F, szs, (first(dims),)), szs, dims)
+end
 
 function *(P::NDimsPlan, f::AbstractArray)
     F, dims = P.F, P.dims
-    @assert size(F) == size(f)
+    @assert size(P) == size(f)
     g = copy(f)
     t = 1:ndims(g)
+    d1 = dims[1]
     for d in dims
-        perm = ntuple(k -> k == 1 ? t[d] : k == d ? t[1] : t[k], ndims(g))
+        perm = ntuple(k -> k == d1 ? t[d] : k == d ? t[d1] : t[k], ndims(g))
         gp = permutedims(g, perm)
         g = permutedims(F*gp, inv_perm(perm))
     end
@@ -80,11 +86,12 @@ end
 
 function \(P::NDimsPlan, f::AbstractArray)
     F, dims = P.F, P.dims
-    @assert size(F) == size(f)
+    @assert size(P) == size(f)
     g = copy(f)
     t = 1:ndims(g)
+    d1 = dims[1]
     for d in dims
-        perm = ntuple(k -> k == 1 ? t[d] : k == d ? t[1] : t[k], ndims(g))
+        perm = ntuple(k -> k == d1 ? t[d] : k == d ? t[d1] : t[k], ndims(g))
         gp = permutedims(g, perm)
         g = permutedims(F\gp, inv_perm(perm))
     end
diff --git a/test/arraystests.jl b/test/arraystests.jl
index 2db0a756..ea045343 100644
--- a/test/arraystests.jl
+++ b/test/arraystests.jl
@@ -29,18 +29,18 @@ import FastTransforms: ArrayPlan, NDimsPlan
 
     @testset "NDimsPlan" begin
         c = randn(20,10,20)
-        @test_throws ErrorException("Different size in dims axes not yet implemented in N-dimensional transform.") NDimsPlan(ArrayPlan(plan_cheb2leg(c), c), (1,2))        
+        @test_throws ErrorException("Different size in dims axes not yet implemented in N-dimensional transform.") NDimsPlan(ArrayPlan(plan_cheb2leg(c), c), size(c), (1,2))        
 
         c = randn(5,20)
         F = plan_cheb2leg(c)
         FT = ArrayPlan(F, c)
-        P = NDimsPlan(FT, (1,))
+        P = NDimsPlan(F, size(c), (1,))
         @test F*c ≈ FT*c ≈ P*c
 
         c = randn(20,20,5);
         F = plan_cheb2leg(c)
         FT = ArrayPlan(F, c)
-        P = NDimsPlan(FT, (1,2))
+        P = NDimsPlan(FT, size(c), (1,2))
 
         @test size(P) == size(c)
         @test size(P,1) == size(c,1)
@@ -53,12 +53,12 @@ import FastTransforms: ArrayPlan, NDimsPlan
         @test f ≈ P*c
         @test c ≈ P\f
 
-        c = randn(10,5,10,60)
-        F = plan_cheb2leg(c)
-        P = NDimsPlan(ArrayPlan(F, c), (1,3))
+        c = randn(5,10,10,60)
+        F = plan_cheb2leg(randn(10))
+        P = NDimsPlan(F, size(c), (2,3))
         f = similar(c)
-        for i in axes(f,2), j in axes(f,4)
-            f[:,i,:,j] = (F*(F*c[:,i,:,j])')'
+        for i in axes(f,1), j in axes(f,4)
+            f[i,:,:,j] = (F*(F*c[i,:,:,j])')'
         end
         @test f ≈ P*c
         @test c ≈ P\f

From 5bdb5e29334407242e9e996d33863a84e5f5f9ff Mon Sep 17 00:00:00 2001
From: ioannisPApapadopoulos <john.papadopoulos0@gmail.com>
Date: Fri, 6 Sep 2024 13:21:18 +0200
Subject: [PATCH 188/222] perm Vector -> perm Tuple

---
 src/arrays.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arrays.jl b/src/arrays.jl
index 9c19f127..803319b5 100644
--- a/src/arrays.jl
+++ b/src/arrays.jl
@@ -28,7 +28,7 @@ function *(P::ArrayPlan, f::AbstractArray)
     @assert szs == size(f)
     d = first(dims)
 
-    perm = [d; setdiff(1:ndims(f), d)]
+    perm = (d, ntuple(i-> i + (i >= d), ndims(f) -1)...)
     fp = permutedims(f, perm)
 
     fr = reshape(fp, size(fp,1), :)
@@ -42,7 +42,7 @@ function \(P::ArrayPlan, f::AbstractArray)
     @assert szs == size(f)
     d = first(dims)
 
-    perm = [d; setdiff(1:ndims(f), d)]
+    perm = (d, ntuple(i-> i + (i >= d), ndims(f) -1)...)
     fp = permutedims(f, perm)
 
     fr = reshape(fp, size(fp,1), :)

From b17e76d5a1001308472af211bef65f339b9ea1f3 Mon Sep 17 00:00:00 2001
From: ioannisPApapadopoulos <john.papadopoulos0@gmail.com>
Date: Fri, 6 Sep 2024 13:23:39 +0200
Subject: [PATCH 189/222] use Base.invperm

---
 src/arrays.jl | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/src/arrays.jl b/src/arrays.jl
index 803319b5..300e2778 100644
--- a/src/arrays.jl
+++ b/src/arrays.jl
@@ -13,15 +13,6 @@ function ArrayPlan(F::FTPlan{<:T}, c::AbstractArray{T}, dims::Tuple{<:Int}=(1,))
     ArrayPlan(F, size(c), dims)
 end
 
-function inv_perm(d::Vector{<:Int})
-    inv_d = Vector{Int}(undef, length(d))
-    for (i, val) in enumerate(d)
-        inv_d[val] = i
-    end
-    return inv_d
-end
-inv_perm(d::Tuple) = inv_perm([d...])
-
 function *(P::ArrayPlan, f::AbstractArray)
     F, dims, szs = P.F, P.dims, P.szs
     @assert length(dims) == 1
@@ -33,7 +24,7 @@ function *(P::ArrayPlan, f::AbstractArray)
 
     fr = reshape(fp, size(fp,1), :)
 
-    permutedims(reshape(F*fr, size(fp)...), inv_perm(perm))
+    permutedims(reshape(F*fr, size(fp)...), invperm(perm))
 end
 
 function \(P::ArrayPlan, f::AbstractArray)
@@ -47,7 +38,7 @@ function \(P::ArrayPlan, f::AbstractArray)
 
     fr = reshape(fp, size(fp,1), :)
 
-    permutedims(reshape(F\fr, size(fp)...), inv_perm(perm))
+    permutedims(reshape(F\fr, size(fp)...), invperm(perm))
 end
 
 struct NDimsPlan{T, FF<:ArrayPlan{<:T}, Szs<:Tuple, Dims<:Tuple} <: Plan{T}
@@ -79,7 +70,7 @@ function *(P::NDimsPlan, f::AbstractArray)
     for d in dims
         perm = ntuple(k -> k == d1 ? t[d] : k == d ? t[d1] : t[k], ndims(g))
         gp = permutedims(g, perm)
-        g = permutedims(F*gp, inv_perm(perm))
+        g = permutedims(F*gp, invperm(perm))
     end
     return g
 end
@@ -93,7 +84,7 @@ function \(P::NDimsPlan, f::AbstractArray)
     for d in dims
         perm = ntuple(k -> k == d1 ? t[d] : k == d ? t[d1] : t[k], ndims(g))
         gp = permutedims(g, perm)
-        g = permutedims(F\gp, inv_perm(perm))
+        g = permutedims(F\gp, invperm(perm))
     end
     return g
 end
\ No newline at end of file

From 1fd17cae3b6a2e5db1a27baeaf6792b03c662708 Mon Sep 17 00:00:00 2001
From: ioannisPApapadopoulos <john.papadopoulos0@gmail.com>
Date: Mon, 9 Sep 2024 12:14:18 +0200
Subject: [PATCH 190/222] size of plans

---
 src/arrays.jl       | 4 ----
 test/arraystests.jl | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/src/arrays.jl b/src/arrays.jl
index 300e2778..5472e736 100644
--- a/src/arrays.jl
+++ b/src/arrays.jl
@@ -4,8 +4,6 @@ struct ArrayPlan{T, FF<:FTPlan{<:T}, Szs<:Tuple, Dims<:Tuple{<:Int}} <: Plan{T}
     dims::Dims
 end
 size(P::ArrayPlan) = P.szs
-size(P::ArrayPlan, k::Int) = P.szs[k]
-size(P::ArrayPlan, k...) = P.szs[[k...]]
 
 function ArrayPlan(F::FTPlan{<:T}, c::AbstractArray{T}, dims::Tuple{<:Int}=(1,)) where T
     szs = size(c)
@@ -54,8 +52,6 @@ struct NDimsPlan{T, FF<:ArrayPlan{<:T}, Szs<:Tuple, Dims<:Tuple} <: Plan{T}
 end
 
 size(P::NDimsPlan) = P.szs
-size(P::NDimsPlan, k::Int) = P.szs[k]
-size(P::NDimsPlan, k...) = P.szs[[k...]]
 
 function NDimsPlan(F::FTPlan, szs::Tuple, dims::Tuple)
     NDimsPlan(ArrayPlan(F, szs, (first(dims),)), szs, dims)
diff --git a/test/arraystests.jl b/test/arraystests.jl
index ea045343..55167a90 100644
--- a/test/arraystests.jl
+++ b/test/arraystests.jl
@@ -8,8 +8,6 @@ import FastTransforms: ArrayPlan, NDimsPlan
         FT = ArrayPlan(F, c)
 
         @test size(FT) == size(c)
-        @test size(FT,1) == size(c,1)
-        @test size(FT,1,2) == (size(c,1), size(c,2))
 
         f = similar(c);
         for k in axes(c,3)
@@ -43,8 +41,6 @@ import FastTransforms: ArrayPlan, NDimsPlan
         P = NDimsPlan(FT, size(c), (1,2))
 
         @test size(P) == size(c)
-        @test size(P,1) == size(c,1)
-        @test size(P,1,2) == (size(c,1), size(c,2))
 
         f = similar(c);
         for k in axes(f,3)

From 9aca3217969c10cae8de4285d1c0f637882d9c11 Mon Sep 17 00:00:00 2001
From: ioannisPApapadopoulos <john.papadopoulos0@gmail.com>
Date: Mon, 23 Sep 2024 12:10:34 +0200
Subject: [PATCH 191/222] bump version number

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index a9269fe0..6d10b624 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.16.4"
+version = "0.16.5"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From edff0d92e9ac6e9b1d0a146c6d55936c1dc153d1 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 21 Oct 2024 13:13:25 +0100
Subject: [PATCH 192/222] Move out forwardrecurrence/clenshaw (#249)

* Move out forwardrecurrence/clenshaw

* move tests

* use RecurrenceRelationships.jl

* Create downstream.yml

* Update FastTransforms.jl

* Update FastTransforms.jl

* disable macos tests

* Update ci.yml

* Update ci.yml

* RecurrenceRelationships v0.1
---
 .github/workflows/ci.yml         |  11 +-
 .github/workflows/downstream.yml |  78 +++++++++++
 Project.toml                     |   3 +-
 src/FastTransforms.jl            |   9 +-
 src/clenshaw.jl                  | 221 -------------------------------
 src/libfasttransforms.jl         |   6 -
 test/clenshawtests.jl            | 139 -------------------
 test/runtests.jl                 |   1 -
 8 files changed, 93 insertions(+), 375 deletions(-)
 create mode 100644 .github/workflows/downstream.yml
 delete mode 100644 src/clenshaw.jl
 delete mode 100644 test/clenshawtests.jl

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4224867f..4991c31d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,11 +10,11 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.7'
+          - 'lts'
           - '1'
         os:
           - ubuntu-latest
-          - macOS-latest
+#          - macOS-latest
         arch:
           - x86
           - x64
@@ -22,8 +22,8 @@ jobs:
           - os: macOS-latest
             arch: x86
     steps:
-      - uses: actions/checkout@v3
-      - uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
@@ -41,6 +41,7 @@ jobs:
       - uses: julia-actions/julia-buildpkg@latest
       - uses: julia-actions/julia-runtest@latest
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v3
+      - uses: codecov/codecov-action@v4
         with:
+          token: ${{ secrets.CODECOV_TOKEN }}
           file: lcov.info
diff --git a/.github/workflows/downstream.yml b/.github/workflows/downstream.yml
new file mode 100644
index 00000000..ba2c1dfa
--- /dev/null
+++ b/.github/workflows/downstream.yml
@@ -0,0 +1,78 @@
+name: IntegrationTest
+on:
+  push:
+    branches: [master]
+    tags: [v*]
+    paths-ignore:
+      - 'LICENSE'
+      - 'README.md'
+      - '.github/workflows/TagBot.yml'
+  pull_request:
+    paths-ignore:
+      - 'LICENSE'
+      - 'README.md'
+      - '.github/workflows/TagBot.yml'
+
+concurrency:
+  group: build-${{ github.event.pull_request.number || github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+jobs:
+  pre_job:
+    # continue-on-error: true # Uncomment once integration is finished
+    runs-on: ubuntu-latest
+    # Map a step output to a job output
+    outputs:
+      should_skip: ${{ steps.skip_check.outputs.should_skip }}
+    steps:
+      - id: skip_check
+        uses: fkirc/skip-duplicate-actions@v5
+  test:
+    needs: pre_job
+    if: needs.pre_job.outputs.should_skip != 'true'
+    name: ${{ matrix.package.group }}/${{ matrix.package.repo }}/${{ matrix.julia-version }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        julia-version: ['1']
+        os: [ubuntu-latest]
+        package:
+          - {repo: ClassicalOrthogonalPolynomials.jl, group: JuliaApproximation}
+          - {repo: MultivariateOrthogonalPolynomials.jl, group: JuliaApproximation}
+          - {repo: ApproxFun.jl, group: JuliaApproximation}
+
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: ${{ matrix.julia-version }}
+          arch: x64
+      - uses: julia-actions/julia-buildpkg@latest
+      - name: Clone Downstream
+        uses: actions/checkout@v4
+        with:
+          repository: ${{ matrix.package.group }}/${{ matrix.package.repo }}
+          path: downstream
+      - name: Load this and run the downstream tests
+        shell: julia --color=yes --project=downstream {0}
+        run: |
+          using Pkg
+          try
+            # force it to use this PR's version of the package
+            Pkg.develop(PackageSpec(path="."))  # resolver may fail with main deps
+            Pkg.update()
+            Pkg.test(; coverage = true)  # resolver may fail with test time deps
+          catch err
+            err isa Pkg.Resolve.ResolverError || rethrow()
+            # If we can't resolve that means this is incompatible by SemVer and this is fine
+            # It means we marked this as a breaking change, so we don't need to worry about
+            # Mistakenly introducing a breaking change, as we have intentionally made one
+            @info "Not compatible with this release. No problem." exception=err
+            exit(0)  # Exit immediately, as a success
+          end
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v4
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          files: lcov.info
diff --git a/Project.toml b/Project.toml
index 6d10b624..f5f447a8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,6 +12,7 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 GenericFFT = "a8297547-1b15-4a5a-a998-a2ac5f1cef28"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+RecurrenceRelationships = "807425ed-42ea-44d6-a357-6771516d7b2c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
@@ -24,12 +25,12 @@ FastGaussQuadrature = "0.4, 0.5, 1"
 FastTransforms_jll = "0.6.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1"
 GenericFFT = "0.1"
+RecurrenceRelationships = "0.1"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
 ToeplitzMatrices = "0.7.1, 0.8"
 julia = "1.7"
 
-
 [extras]
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index fe23f46e..85ece739 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,7 +1,7 @@
 module FastTransforms
 
 using BandedMatrices, FastGaussQuadrature, FillArrays, LinearAlgebra,
-      Reexport, SpecialFunctions, ToeplitzMatrices
+      Reexport, SpecialFunctions, ToeplitzMatrices, RecurrenceRelationships
 
 @reexport using AbstractFFTs
 @reexport using FFTW
@@ -32,6 +32,12 @@ import LinearAlgebra: mul!, lmul!, ldiv!, cholesky
 
 import GenericFFT: interlace # imported in downstream packages
 
+import RecurrenceRelationships: clenshaw!, check_clenshaw_recurrences
+
+const _forwardrecurrence! = RecurrenceRelationships.forwardrecurrence!
+const _clenshaw_next = RecurrenceRelationships.clenshaw_next
+const _forwardrecurrence_next = RecurrenceRelationships.forwardrecurrence_next
+
 export leg2cheb, cheb2leg, ultra2ultra, jac2jac,
        lag2lag, jac2ultra, ultra2jac, jac2cheb,
        cheb2jac, ultra2cheb, cheb2ultra, associatedjac2jac,
@@ -53,7 +59,6 @@ export plan_leg2cheb, plan_cheb2leg, plan_ultra2ultra, plan_jac2jac,
        plan_tet2cheb, plan_tet_synthesis, plan_tet_analysis,
        plan_spinsph2fourier, plan_spinsph_synthesis, plan_spinsph_analysis
 
-include("clenshaw.jl")
 
 include("libfasttransforms.jl")
 include("elliptic.jl")
diff --git a/src/clenshaw.jl b/src/clenshaw.jl
deleted file mode 100644
index 24a1ee59..00000000
--- a/src/clenshaw.jl
+++ /dev/null
@@ -1,221 +0,0 @@
-"""
-   forwardrecurrence!(v, A, B, C, x)
-
-evaluates the orthogonal polynomials at points `x`,
-where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
-as defined in DLMF,
-overwriting `v` with the results.
-"""
-function forwardrecurrence!(v::AbstractVector{T}, A::AbstractVector, B::AbstractVector, C::AbstractVector, x, p0=one(T)) where T
-    N = length(v)
-    N == 0 && return v
-    length(A)+1 ≥ N && length(B)+1 ≥ N && length(C)+1 ≥ N || throw(ArgumentError("A, B, C must contain at least $(N-1) entries"))
-    p1 = convert(T, N == 1 ? p0 : muladd(A[1],x,B[1])*p0) # avoid accessing A[1]/B[1] if empty
-    _forwardrecurrence!(v, A, B, C, x, convert(T, p0), p1)
-end
-
-
-Base.@propagate_inbounds _forwardrecurrence_next(n, A, B, C, x, p0, p1) = muladd(muladd(A[n],x,B[n]), p1, -C[n]*p0)
-# special case for B[n] == 0
-Base.@propagate_inbounds _forwardrecurrence_next(n, A, ::Zeros, C, x, p0, p1) = muladd(A[n]*x, p1, -C[n]*p0)
-# special case for Chebyshev U
-Base.@propagate_inbounds _forwardrecurrence_next(n, A::AbstractFill, ::Zeros, C::Ones, x, p0, p1) = muladd(getindex_value(A)*x, p1, -p0)
-
-
-# this supports adaptivity: we can populate `v` for large `n`
-function _forwardrecurrence!(v::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x, p0, p1)
-    N = length(v)
-    N == 0 && return v
-    v[1] = p0
-    N == 1 && return v
-    v[2] = p1
-    _forwardrecurrence!(v, A, B, C, x, 2:N)
-end
-
-function _forwardrecurrence!(v::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x, kr::AbstractUnitRange)
-    n₀, N = first(kr), last(kr)
-    @boundscheck N > length(v) && throw(BoundsError(v, N))
-    p0, p1 = v[n₀-1], v[n₀]
-    @inbounds for n = n₀:N-1
-        p1,p0 = _forwardrecurrence_next(n, A, B, C, x, p0, p1),p1
-        v[n+1] = p1
-    end
-    v
-end
-
-
-
-
-forwardrecurrence(N::Integer, A::AbstractVector, B::AbstractVector, C::AbstractVector, x) =
-    forwardrecurrence!(Vector{promote_type(eltype(A),eltype(B),eltype(C),typeof(x))}(undef, N), A, B, C, x)
-
-
-"""
-clenshaw!(c, A, B, C, x)
-
-evaluates the orthogonal polynomial expansion with coefficients `c` at points `x`,
-where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
-as defined in DLMF,
-overwriting `x` with the results.
-
-If `c` is a matrix this treats each column as a separate vector of coefficients, returning a vector
-if `x` is a number and a matrix if `x` is a vector.
-"""
-clenshaw!(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector) =
-    clenshaw!(c, A, B, C, x, Ones{eltype(x)}(length(x)), x)
-
-clenshaw!(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number, f::AbstractVector) =
-    clenshaw!(c, A, B, C, x, one(eltype(x)), f)
-
-
-clenshaw!(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector, f::AbstractMatrix) =
-    clenshaw!(c, A, B, C, x, Ones{eltype(x)}(length(x)), f)
-
-
-"""
-clenshaw!(c, A, B, C, x, ϕ₀, f)
-
-evaluates the orthogonal polynomial expansion with coefficients `c` at points `x`,
-where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
-as defined in DLMF and ϕ₀ is the zeroth polynomial,
-overwriting `f` with the results.
-"""
-function clenshaw!(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector, ϕ₀::AbstractVector, f::AbstractVector)
-    f .= ϕ₀ .* clenshaw.(Ref(c), Ref(A), Ref(B), Ref(C), x)
-end
-
-
-function clenshaw!(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number, ϕ₀::Number, f::AbstractVector)
-    size(c,2) == length(f) || throw(DimensionMismatch("coeffients size and output length must match"))
-    @inbounds for j in axes(c,2)
-        f[j] = ϕ₀ * clenshaw(view(c,:,j), A, B, C, x)
-    end
-    f
-end
-
-function clenshaw!(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector, ϕ₀::AbstractVector, f::AbstractMatrix)
-    (size(x,1),size(c,2)) == size(f) || throw(DimensionMismatch("coeffients size and output length must match"))
-    @inbounds for j in axes(c,2)
-        clenshaw!(view(c,:,j), A, B, C, x, ϕ₀, view(f,:,j))
-    end
-    f
-end
-
-Base.@propagate_inbounds _clenshaw_next(n, A, B, C, x, c, bn1, bn2) = muladd(muladd(A[n],x,B[n]), bn1, muladd(-C[n+1],bn2,c[n]))
-Base.@propagate_inbounds _clenshaw_next(n, A, ::Zeros, C, x, c, bn1, bn2) = muladd(A[n]*x, bn1, muladd(-C[n+1],bn2,c[n]))
-# Chebyshev U
-Base.@propagate_inbounds _clenshaw_next(n, A::AbstractFill, ::Zeros, C::Ones, x, c, bn1, bn2) = muladd(getindex_value(A)*x, bn1, -bn2+c[n])
-
-# allow special casing first arg, for ChebyshevT in OrthogonalPolynomialsQuasi
-Base.@propagate_inbounds _clenshaw_first(A, B, C, x, c, bn1, bn2) = muladd(muladd(A[1],x,B[1]), bn1, muladd(-C[2],bn2,c[1]))
-
-
-"""
-    clenshaw(c, A, B, C, x)
-
-evaluates the orthogonal polynomial expansion with coefficients `c` at points `x`,
-where `A`, `B`, and `C` are `AbstractVector`s containing the recurrence coefficients
-as defined in DLMF.
-`x` may also be a single `Number`.
-
-If `c` is a matrix this treats each column as a separate vector of coefficients, returning a vector
-if `x` is a number and a matrix if `x` is a vector.
-"""
-
-function clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number)
-    N = length(c)
-    T = promote_type(eltype(c),eltype(A),eltype(B),eltype(C),typeof(x))
-    @boundscheck check_clenshaw_recurrences(N, A, B, C)
-    N == 0 && return zero(T)
-    @inbounds begin
-        bn2 = zero(T)
-        bn1 = convert(T,c[N])
-        N == 1 && return bn1
-        for n = N-1:-1:2
-            bn1,bn2 = _clenshaw_next(n, A, B, C, x, c, bn1, bn2),bn1
-        end
-        bn1 = _clenshaw_first(A, B, C, x, c, bn1, bn2)
-    end
-    bn1
-end
-
-
-clenshaw(c::AbstractVector, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector) =
-    clenshaw!(c, A, B, C, copy(x))
-
-function clenshaw(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::Number)
-    T = promote_type(eltype(c),eltype(A),eltype(B),eltype(C),typeof(x))
-    clenshaw!(c, A, B, C, x, Vector{T}(undef, size(c,2)))
-end
-
-function clenshaw(c::AbstractMatrix, A::AbstractVector, B::AbstractVector, C::AbstractVector, x::AbstractVector)
-    T = promote_type(eltype(c),eltype(A),eltype(B),eltype(C),typeof(x))
-    clenshaw!(c, A, B, C, x, Matrix{T}(undef, size(x,1), size(c,2)))
-end
-
-###
-# Chebyshev T special cases
-###
-
-"""
-   clenshaw!(c, x)
-
-evaluates the first-kind Chebyshev (T) expansion with coefficients `c` at points `x`,
-overwriting `x` with the results.
-"""
-clenshaw!(c::AbstractVector, x::AbstractVector) = clenshaw!(c, x, x)
-
-
-"""
-   clenshaw!(c, x, f)
-
-evaluates the first-kind Chebyshev (T) expansion with coefficients `c` at points `x`,
-overwriting `f` with the results.
-"""
-function clenshaw!(c::AbstractVector, x::AbstractVector, f::AbstractVector)
-    f .= clenshaw.(Ref(c), x)
-end
-
-"""
-    clenshaw(c, x)
-
-evaluates the first-kind Chebyshev (T) expansion with coefficients `c` at  the points `x`.
-`x` may also be a single `Number`.
-"""
-function clenshaw(c::AbstractVector, x::Number)
-    N,T = length(c),promote_type(eltype(c),typeof(x))
-    if N == 0
-        return zero(T)
-    elseif N == 1 # avoid issues with NaN x
-        return first(c)*one(x)
-    end
-
-    y = 2x
-    bk1,bk2 = zero(T),zero(T)
-    @inbounds begin
-        for k = N:-1:2
-            bk1,bk2 = muladd(y,bk1,c[k]-bk2),bk1
-        end
-        muladd(x,bk1,c[1]-bk2)
-    end
-end
-
-function clenshaw!(c::AbstractMatrix, x::Number, f::AbstractVector)
-    size(c,2) == length(f) || throw(DimensionMismatch("coeffients size and output length must match"))
-    @inbounds for j in axes(c,2)
-        f[j] = clenshaw(view(c,:,j), x)
-    end
-    f
-end
-
-function clenshaw!(c::AbstractMatrix, x::AbstractVector, f::AbstractMatrix)
-    (size(x,1),size(c,2)) == size(f) || throw(DimensionMismatch("coeffients size and output length must match"))
-    @inbounds for j in axes(c,2)
-        clenshaw!(view(c,:,j), x, view(f,:,j))
-    end
-    f
-end
-
-clenshaw(c::AbstractVector, x::AbstractVector) = clenshaw!(c, copy(x))
-clenshaw(c::AbstractMatrix, x::Number) = clenshaw!(c, x, Vector{promote_type(eltype(c),typeof(x))}(undef, size(c,2)))
-clenshaw(c::AbstractMatrix, x::AbstractVector) = clenshaw!(c, x, Matrix{promote_type(eltype(c),eltype(x))}(undef, size(x,1), size(c,2)))
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index ea724ce9..d89f0490 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -61,12 +61,6 @@ function horner!(c::StridedVector{Float32}, x::Vector{Float32}, f::Vector{Float3
     f
 end
 
-function check_clenshaw_recurrences(N, A, B, C)
-    if length(A) < N || length(B) < N || length(C) < N+1
-        throw(ArgumentError("A, B must contain at least $N entries and C must contain at least $(N+1) entrie"))
-    end
-end
-
 function check_clenshaw_points(x, ϕ₀, f)
     length(x) == length(ϕ₀) == length(f) || throw(ArgumentError("Dimensions must match"))
 end
diff --git a/test/clenshawtests.jl b/test/clenshawtests.jl
deleted file mode 100644
index f4e0798d..00000000
--- a/test/clenshawtests.jl
+++ /dev/null
@@ -1,139 +0,0 @@
-using FastTransforms, FillArrays, Test
-import FastTransforms: clenshaw, clenshaw!, forwardrecurrence!, forwardrecurrence
-
-@testset "clenshaw" begin
-    @testset "Chebyshev T" begin
-        for elty in (Float64, Float32)
-            c = [1,2,3]
-            cf = elty.(c)
-            @test @inferred(clenshaw(c,1)) ≡ 1 + 2 + 3
-            @test @inferred(clenshaw(c,0)) ≡ 1 + 0 - 3
-            @test @inferred(clenshaw(c,0.1)) == 1 + 2*0.1 + 3*cos(2acos(0.1))
-            @test @inferred(clenshaw(c,[-1,0,1])) == clenshaw!(c,[-1,0,1]) == [2,-2,6]
-            @test clenshaw(c,[-1,0,1]) isa Vector{Int}
-            @test @inferred(clenshaw(elty[],1)) ≡ zero(elty)
-
-            x = elty[1,0,0.1]
-            @test @inferred(clenshaw(c,x)) ≈ @inferred(clenshaw!(c,copy(x))) ≈
-                @inferred(clenshaw!(c,x,similar(x))) ≈
-                @inferred(clenshaw(cf,x)) ≈ @inferred(clenshaw!(cf,copy(x))) ≈
-                @inferred(clenshaw!(cf,x,similar(x))) ≈ elty[6,-2,-1.74]
-
-            @testset "Strided" begin
-                cv = view(cf,:)
-                xv = view(x,:)
-                @test clenshaw!(cv, xv, similar(xv)) == clenshaw!(cf,x,similar(x))
-
-                cv2 = view(cf,1:2:3)
-                @test clenshaw!(cv2, xv, similar(xv)) == clenshaw([1,3], x)
-
-                # modifies x and xv
-                @test clenshaw!(cv2, xv) == xv == x == clenshaw([1,3], elty[1,0,0.1])
-            end
-
-            @testset "matrix coefficients" begin
-                c = [1 2; 3 4; 5 6]
-                @test clenshaw(c,0.1) ≈ [clenshaw(c[:,1],0.1), clenshaw(c[:,2],0.1)]
-                @test clenshaw(c,[0.1,0.2]) ≈ [clenshaw(c[:,1], 0.1) clenshaw(c[:,2], 0.1); clenshaw(c[:,1], 0.2) clenshaw(c[:,2], 0.2)]
-            end
-        end
-    end
-
-    @testset "Chebyshev U" begin
-        N = 5
-        A, B, C = Fill(2,N-1), Zeros{Int}(N-1), Ones{Int}(N)
-        @testset "forwardrecurrence!" begin
-            @test @inferred(forwardrecurrence(N, A, B, C, 1)) == @inferred(forwardrecurrence!(Vector{Int}(undef,N), A, B, C, 1)) == 1:N
-            @test forwardrecurrence!(Vector{Int}(undef,N), A, B, C, -1) == (-1) .^ (0:N-1) .* (1:N)
-            @test forwardrecurrence(N, A, B, C, 0.1) ≈ forwardrecurrence!(Vector{Float64}(undef,N), A, B, C, 0.1) ≈
-                    sin.((1:N) .* acos(0.1)) ./ sqrt(1-0.1^2)
-        end
-
-        c = [1,2,3]
-        @test c'forwardrecurrence(3, A, B, C, 0.1) ≈ clenshaw([1,2,3], A, B, C, 0.1) ≈
-            1 + (2sin(2acos(0.1)) + 3sin(3acos(0.1)))/sqrt(1-0.1^2)
-
-        @testset "matrix coefficients" begin
-            c = [1 2; 3 4; 5 6]
-            @test clenshaw(c,A,B,C,0.1) ≈ [clenshaw(c[:,1],A,B,C,0.1), clenshaw(c[:,2],A,B,C,0.1)]
-            @test clenshaw(c,A,B,C,[0.1,0.2]) ≈ [clenshaw(c[:,1], A,B,C,0.1) clenshaw(c[:,2], A,B,C,0.1); clenshaw(c[:,1], A,B,C,0.2) clenshaw(c[:,2], A,B,C,0.2)]
-        end
-    end
-
-    @testset "Chebyshev-as-general" begin
-        @testset "forwardrecurrence!" begin
-            N = 5
-            A, B, C = [1; fill(2,N-2)], fill(0,N-1), fill(1,N)
-            Af, Bf, Cf = float(A), float(B), float(C)
-            @test forwardrecurrence(N, A, B, C, 1) == forwardrecurrence!(Vector{Int}(undef,N), A, B, C, 1) == ones(Int,N)
-            @test forwardrecurrence!(Vector{Int}(undef,N), A, B, C, -1) == (-1) .^ (0:N-1)
-            @test forwardrecurrence(N, A, B, C, 0.1) ≈ forwardrecurrence!(Vector{Float64}(undef,N), A, B, C, 0.1) ≈ cos.((0:N-1) .* acos(0.1))
-        end
-
-        c, A, B, C = [1,2,3], [1,2,2], fill(0,3), fill(1,4)
-        cf, Af, Bf, Cf = float(c), float(A), float(B), float(C)
-        @test @inferred(clenshaw(c, A, B, C, 1)) ≡ 6
-        @test @inferred(clenshaw(c, A, B, C, 0.1)) ≡ -1.74
-        @test @inferred(clenshaw([1,2,3], A, B, C, [-1,0,1])) == clenshaw!([1,2,3],A, B, C, [-1,0,1]) == [2,-2,6]
-        @test clenshaw(c, A, B, C, [-1,0,1]) isa Vector{Int}
-        @test @inferred(clenshaw(Float64[], A, B, C, 1)) ≡ 0.0
-
-        x = [1,0,0.1]
-        @test @inferred(clenshaw(c, A, B, C, x)) ≈ @inferred(clenshaw!(c, A, B, C, copy(x))) ≈
-            @inferred(clenshaw!(c, A, B, C, x, one.(x), similar(x))) ≈
-            @inferred(clenshaw!(cf, Af, Bf, Cf, x, one.(x),similar(x))) ≈
-            @inferred(clenshaw([1.,2,3], A, B, C, x)) ≈
-            @inferred(clenshaw!([1.,2,3], A, B, C, copy(x))) ≈ [6,-2,-1.74]
-    end
-
-    @testset "Legendre" begin
-        @testset "Float64" begin
-            N = 5
-            n = 0:N-1
-            A = (2n .+ 1) ./ (n .+ 1)
-            B = zeros(N)
-            C = n ./ (n .+ 1)
-            v_1 = forwardrecurrence(N, A, B, C, 1)
-            v_f = forwardrecurrence(N, A, B, C, 0.1)
-            @test v_1 ≈ ones(N)
-            @test forwardrecurrence(N, A, B, C, -1) ≈ (-1) .^ (0:N-1)
-            @test v_f ≈ [1,0.1,-0.485,-0.1475,0.3379375]
-
-            n = 0:N # need extra entry for C in Clenshaw
-            C = n ./ (n .+ 1)
-            for j = 1:N
-                c = [zeros(j-1); 1]
-                @test clenshaw(c, A, B, C, 1) ≈ v_1[j] # Julia code
-                @test clenshaw(c, A, B, C, 0.1) ≈  v_f[j] # Julia code
-                @test clenshaw!(c, A, B, C, [1.0,0.1], [1.0,1.0], [0.0,0.0])  ≈ [v_1[j],v_f[j]] # libfasttransforms
-            end
-        end
-
-        @testset "BigFloat" begin
-            N = 5
-            n = BigFloat(0):N-1
-            A = (2n .+ 1) ./ (n .+ 1)
-            B = zeros(N)
-            C = n ./ (n .+ 1)
-            @test forwardrecurrence(N, A, B, C, parse(BigFloat,"0.1")) ≈ [1,big"0.1",big"-0.485",big"-0.1475",big"0.3379375"]
-        end
-    end
-
-    @testset "Int" begin
-        N = 10; A = 1:10; B = 2:11; C = range(3; step=2, length=N+1)
-        v_i = forwardrecurrence(N, A, B, C, 1)
-        v_f = forwardrecurrence(N, A, B, C, 0.1)
-        @test v_i isa Vector{Int}
-        @test v_f isa Vector{Float64}
-
-        j = 3
-        @test clenshaw([zeros(Int,j-1); 1; zeros(Int,N-j)], A, B, C, 1) == v_i[j]
-    end
-
-    @testset "Zeros diagonal" begin
-        N = 10; A = randn(N); B = Zeros{Int}(N); C = randn(N+1)
-        @test forwardrecurrence(N, A, B, C, 0.1) == forwardrecurrence(N, A, Vector(B), C, 0.1)
-        c = randn(N)
-        @test clenshaw(c, A, B, C, 0.1) == clenshaw(c, A, Vector(B), C, 0.1)
-    end
-end
diff --git a/test/runtests.jl b/test/runtests.jl
index a4881c3b..1c13e255 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -8,7 +8,6 @@ include("nuffttests.jl")
 include("paduatests.jl")
 include("gaunttests.jl")
 include("hermitetests.jl")
-include("clenshawtests.jl")
 include("toeplitzplanstests.jl")
 include("toeplitzhankeltests.jl")
 include("symmetrictoeplitzplushankeltests.jl")

From 9f8d3520f56321b9160f8e085ec3bb4c3d318e38 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Mon, 21 Oct 2024 13:21:28 +0100
Subject: [PATCH 193/222] v0.16.6

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index f5f447a8..754d7e3e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.16.5"
+version = "0.16.6"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"

From 48ce507351185b836ee2521a2a552ccc86df44e9 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 1 Nov 2024 14:29:17 -0500
Subject: [PATCH 194/222] add GramMatrix

---
 Project.toml            |   4 +
 docs/src/index.md       |  14 ++
 src/FastTransforms.jl   |  18 ++-
 src/GramMatrix.jl       | 320 ++++++++++++++++++++++++++++++++++++++++
 test/GramMatrixtests.jl |  78 ++++++++++
 test/runtests.jl        |   4 +-
 6 files changed, 430 insertions(+), 8 deletions(-)
 create mode 100644 src/GramMatrix.jl
 create mode 100644 test/GramMatrixtests.jl

diff --git a/Project.toml b/Project.toml
index 754d7e3e..be1349a2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,12 +4,14 @@ version = "0.16.6"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
+ArrayLayouts = "4c555306-a7a7-4459-81d9-ec55ddd5c99a"
 BandedMatrices = "aae01518-5342-5314-be14-df237901396f"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838"
 FastTransforms_jll = "34b6f7d7-08f9-5794-9e10-3819e4c7e49a"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 GenericFFT = "a8297547-1b15-4a5a-a998-a2ac5f1cef28"
+LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 RecurrenceRelationships = "807425ed-42ea-44d6-a357-6771516d7b2c"
@@ -19,12 +21,14 @@ ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
 [compat]
 AbstractFFTs = "1.0"
+ArrayLayouts = "1.10"
 BandedMatrices = "1.5"
 FFTW = "1.7"
 FastGaussQuadrature = "0.4, 0.5, 1"
 FastTransforms_jll = "0.6.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1"
 GenericFFT = "0.1"
+LazyArrays = "2.2"
 RecurrenceRelationships = "0.1"
 Reexport = "0.2, 1.0"
 SpecialFunctions = "0.10, 1, 2"
diff --git a/docs/src/index.md b/docs/src/index.md
index fb37e2ef..503485fa 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -10,6 +10,20 @@ This package provides a Julia wrapper for the [C library](https://github.com/Mik
 
 For this documentation, please see the documentation for [FastTransforms](https://github.com/MikaelSlevinsky/FastTransforms). Most transforms have separate forward and inverse plans. In some instances, however, the inverse is in the sense of least-squares, and therefore only the forward transform is planned.
 
+### Fast Cholesky factorization of the Gram matrix
+
+```@docs
+AbstractGramMatrix
+```
+
+```@docs
+GramMatrix
+```
+
+```@docs
+ChebyshevGramMatrix
+```
+
 ## Nonuniform fast Fourier transforms
 
 ```@docs
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 85ece739..5f401e0f 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,6 +1,6 @@
 module FastTransforms
 
-using BandedMatrices, FastGaussQuadrature, FillArrays, LinearAlgebra,
+using ArrayLayouts, BandedMatrices, FastGaussQuadrature, FillArrays, LazyArrays, LinearAlgebra,
       Reexport, SpecialFunctions, ToeplitzMatrices, RecurrenceRelationships
 
 @reexport using AbstractFFTs
@@ -19,6 +19,8 @@ import AbstractFFTs: Plan, ScaledPlan,
                      fftshift, ifftshift, rfft_output_size, brfft_output_size,
                      normalization
 
+import ArrayLayouts: colsupport, LayoutMatrix, MemoryLayout, AbstractBandedLayout
+
 import BandedMatrices: bandwidths
 
 import FFTW: dct, dct!, idct, idct!, plan_dct!, plan_idct!,
@@ -28,7 +30,7 @@ import FastGaussQuadrature: unweightedgausshermite
 
 import FillArrays: AbstractFill, getindex_value
 
-import LinearAlgebra: mul!, lmul!, ldiv!, cholesky
+import LinearAlgebra: cholesky, issymmetric, isposdef, mul!, lmul!, ldiv!
 
 import GenericFFT: interlace # imported in downstream packages
 
@@ -98,14 +100,18 @@ export plan_clenshawcurtis, plan_fejer1, plan_fejer2
 include("clenshawcurtis.jl")
 include("fejer.jl")
 
-export weightedhermitetransform, iweightedhermitetransform
-
-include("hermite.jl")
-
 export gaunt
 
 include("gaunt.jl")
 
+export GramMatrix, ChebyshevGramMatrix
+
+include("GramMatrix.jl")
+
+export weightedhermitetransform, iweightedhermitetransform
+
+include("hermite.jl")
+
 export sphones, sphzeros, sphrand, sphrandn, sphevaluate,
        sphvones, sphvzeros, sphvrand, sphvrandn,
        diskones, diskzeros, diskrand, diskrandn,
diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
new file mode 100644
index 00000000..f5542f42
--- /dev/null
+++ b/src/GramMatrix.jl
@@ -0,0 +1,320 @@
+"""
+    AbstractGramMatrix
+
+Supertype of Gram matrices such as [`GramMatrix`](@ref) and [`ChebyshevGramMatrix`](@ref).
+"""
+abstract type AbstractGramMatrix{T} <: LayoutMatrix{T} end
+
+@inline issymmetric(G::AbstractGramMatrix) = true
+@inline isposdef(G::AbstractGramMatrix) = true
+@inline colsupport(G::AbstractGramMatrix, j) = colrange(G, j)
+
+"""
+    GramMatrix(W::AbstractMatrix, X::AbstractMatrix)
+
+Construct a Gram matrix of size ``size(W)`` with data stored in ``W``.
+Given a family of orthogonal polynomials ``{\\bf P}(x) = \\{p_0(x), p_1(x),\\ldots\\}``
+and a continuous inner product ``\\langle f, g\\rangle``, the Gram matrix is defined by:
+```math
+W_{i,j} = \\langle p_{i-1}, p_{j-1}\\rangle.
+```
+Moreover, given ``X``, the transposed Jacobi matrix that satisfies ``x {\\bf P}(x) = {\\bf P}(x) X``,
+the Gram matrix satisfies the skew-symmetric rank-2 displacement equation (``X = X_{1:n, 1:n}``):
+```math
+X^\\top W - WX = GJG^\\top,
+```
+where ``J = \\begin{pmatrix} 0 & 1\\ -1 & 0\\end{pmatrix}`` and where:
+```math
+G_{:, 1} = e_n,\\quad{\\rm and}\\quad G_{:, 2} = W_{n-1, :}X_{n-1, n} - X^\\top W_{:, n}.
+```
+Fast (``{\\cal O}(n^2)``) Cholesky factorization of the Gram matrix returns the
+connection coefficients between ``{\\bf P}(x)`` and the polynomials ``{\\bf Q}(x)``
+orthogonal in the modified inner product, ``{\\bf P}(x) = {\\bf Q}(x) R``.
+"""
+struct GramMatrix{T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix{T}} <: AbstractGramMatrix{T}
+    W::WT
+    X::XT
+    function GramMatrix{T, WT, XT}(W::WT, X::XT) where {T, WT, XT}
+        if size(W) ≠ size(X)
+            throw(ArgumentError("Cannot construct a GramMatrix with W and X of different sizes."))
+        end
+        if !issymmetric(W)
+            throw(ArgumentError("Cannot construct a GramMatrix with a nonsymmetric W."))
+        end
+        if bandwidths(X) ≠ (1, 1)
+            throw(ArgumentError("Cannot construct a GramMatrix with a nontridiagonal X."))
+        end
+        new{T, WT, XT}(W, X)
+    end
+end
+
+GramMatrix(W::WT, X::XT) where {T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix{T}} = GramMatrix{T, WT, XT}(W, X)
+
+@inline size(G::GramMatrix) = size(G.W)
+@inline getindex(G::GramMatrix, i::Integer, j::Integer) = G.W[i, j]
+@inline bandwidths(G::GramMatrix) = bandwidths(G.W)
+@inline MemoryLayout(G::GramMatrix) = MemoryLayout(G.W)
+
+#
+# X'W-W*X = G*J*G'
+# This returns G, where J = [0 1; -1 0], respecting the skew-symmetry of the right-hand side.
+#
+function compute_skew_generators(W::GramMatrix{T}) where T
+    X = W.X
+    m, n = size(W)
+    G = zeros(T, n, 2)
+    G[n, 1] = one(T)
+    G[:, 2] .= W[n-1, :]*X[n-1, n] - X'W[:, n]
+    return G
+end
+
+function cholesky(W::GramMatrix{T}) where T
+    cholesky(MemoryLayout(W), W)
+end
+
+function cholesky(_, W::GramMatrix{T}) where T
+    n = size(W, 1)
+    G = compute_skew_generators(W)
+    L = zeros(T, n, n)
+    c = W[:, 1]
+    ĉ = zeros(T, n)
+    l = zeros(T, n)
+    v = zeros(T, n)
+    row1 = zeros(T, n)
+    fastcholesky!(L, W.X, G, c, ĉ, l, v, row1, n)
+    return Cholesky(L, 'L', 0)
+end
+
+function fastcholesky!(L::Matrix{T}, X, G, c, ĉ, l, v, row1, n) where T
+    @inbounds @simd for k in 1:n-1
+        d = sqrt(c[k])
+        for j in k:n
+            L[j, k] = l[j] = c[j]/d
+        end
+        for j in k:n
+            v[j] = G[j, 1]*G[k, 2] - G[j, 2]*G[k, 1]
+        end
+        for j in k+1:n-1
+            ĉ[j] = (X[j-1, j]*c[j-1] + (X[j, j]-X[k, k])*c[j] + X[j+1, j]*c[j+1] + c[k]*row1[j] - row1[k]*c[j] - v[j])/X[k+1, k]
+        end
+        ĉ[n] = (X[n-1, n]*c[n-1] + (X[n, n]-X[k, k])*c[n] + c[k]*row1[n] - row1[k]*c[n] - v[n])/X[k+1, k]
+        cst = X[k+1, k]/d
+        for j in k+1:n
+            row1[j] = -cst*l[j]
+        end
+        cst = c[k+1]/d
+        for j in k:n
+            c[j] = ĉ[j] - cst*l[j]
+        end
+        gd1 = G[k, 1]/d
+        gd2 = G[k, 2]/d
+        for j in k:n
+            G[j, 1] -= l[j]*gd1
+            G[j, 2] -= l[j]*gd2
+        end
+    end
+    L[n, n] = sqrt(c[n])
+end
+
+function cholesky(::Union{AbstractBandedLayout, SymmetricLayout{<: AbstractBandedLayout}}, W::GramMatrix{T}) where T
+    n = size(W, 1)
+    G = compute_skew_generators(W)
+    L = BandedMatrix{T}(undef, (n, n), (bandwidth(W, 1), 0))
+    c = W[:, 1]
+    ĉ = zeros(T, n)
+    l = zeros(T, n)
+    v = zeros(T, n)
+    row1 = zeros(T, n)
+    fastcholesky!(L, W.X, G, c, ĉ, l, v, row1, n)
+    return Cholesky(L, 'L', 0)
+end
+
+function fastcholesky!(L::BandedMatrix{T}, X, G, c, ĉ, l, v, row1, n) where T
+    b = bandwidth(L, 1)
+    @inbounds @simd for k in 1:n-1
+        d = sqrt(c[k])
+        for j in k:min(k+b, n)
+            L[j, k] = l[j] = c[j]/d
+        end
+        for j in max(k, n-b-1):n
+            v[j] = G[j, 1]*G[k, 2] - G[j, 2]*G[k, 1]
+        end
+        for j in k+1:min(k+b+1, n-1)
+            ĉ[j] = (X[j-1, j]*c[j-1] + (X[j, j]-X[k, k])*c[j] + X[j+1, j]*c[j+1] + c[k]*row1[j] - row1[k]*c[j] - v[j])/X[k+1, k]
+        end
+        if k ≥ n-b-1
+            ĉ[n] = (X[n-1, n]*c[n-1] + (X[n, n]-X[k, k])*c[n] + c[k]*row1[n] - row1[k]*c[n] - v[n])/X[k+1, k]
+        end
+        cst = X[k+1, k]/d
+        for j in k+1:min(k+b+1, n)
+            row1[j] = -cst*l[j]
+        end
+        cst = c[k+1]/d
+        for j in k:min(k+b+1, n)
+            c[j] = ĉ[j] - cst*l[j]
+        end
+        gd1 = G[k, 1]/d
+        gd2 = G[k, 2]/d
+        for j in max(k, n-b-1):n
+            G[j, 1] -= l[j]*gd1
+            G[j, 2] -= l[j]*gd2
+        end
+    end
+    L[n, n] = sqrt(c[n])
+end
+
+"""
+    ChebyshevGramMatrix(μ::AbstractVector)
+
+Construct a Chebyshev--Gram matrix of size ``(length(μ)+1)÷2`` with entries:
+```math
+W_{i,j} = \\frac{\\mu_{|i-j|+1} +\\mu_{i+j-1}}{2}.
+```
+Due to the linearization of a product of two first-kind Chebyshev polynomials,
+the Chebyshev--Gram matrix can be constructed from modified Chebyshev moments:
+```math
+\\mu_{n} = \\langle T_{n-1}, 1\\rangle.
+```
+Specialized construction and Cholesky factorization is given for this type.
+
+See also [`GramMatrix`](@ref) for the general case.
+"""
+struct ChebyshevGramMatrix{T, V <: AbstractVector{T}} <: AbstractGramMatrix{T}
+    μ::V
+    n::Int
+end
+
+function ChebyshevGramMatrix(μ::V) where {T, V <: AbstractVector{T}}
+    n = (length(μ)+1)÷2
+    ChebyshevGramMatrix{T, V}(μ, n)
+end
+
+@inline size(G::ChebyshevGramMatrix) = (G.n, G.n)
+@inline getindex(G::ChebyshevGramMatrix, i::Integer, j::Integer) = (G.μ[abs(i-j)+1] + G.μ[i+j-1])/2
+@inline bandwidths(G::ChebyshevGramMatrix{T, <: PaddedVector{T}}) where T = (length(G.μ.args[2])-1, length(G.μ.args[2])-1)
+
+#
+# 2X'W-W*2X = G*J*G'
+# This returns G, where J = [0 1; -1 0], respecting the skew-symmetry of the right-hand side.
+# We use twice the Chebybshev Jacobi matrix so that subsequent arithmetic is easier.
+#
+function compute_skew_generators(W::ChebyshevGramMatrix{T}) where T
+    μ = W.μ
+    n = size(W, 1)
+    G = zeros(T, n, 2)
+    G[n, 1] = one(T)
+    @inbounds @simd for j in 1:n-1
+        G[j, 2] = -(μ[n+2-j] + μ[n+j])/2
+    end
+    G[n, 2] = -μ[2]/2
+    G
+end
+
+function cholesky(W::ChebyshevGramMatrix{T}) where T
+    n = size(W, 1)
+    G = compute_skew_generators(W)
+    L = zeros(T, n, n)
+    c = W[:, 1]
+    ĉ = zeros(T, n)
+    l = zeros(T, n)
+    v = zeros(T, n)
+    row1 = zeros(T, n)
+    fastcholesky!(L, G, c, ĉ, l, v, row1, n)
+    return Cholesky(L, 'L', 0)
+end
+
+function fastcholesky!(L::Matrix{T}, G, c, ĉ, l, v, row1, n) where T
+    @inbounds @simd for k in 1:n-1
+        d = sqrt(c[k])
+        for j in k:n
+            L[j, k] = l[j] = c[j]/d
+        end
+        for j in k:n
+            v[j] = G[j, 1]*G[k, 2] - G[j, 2]*G[k, 1]
+        end
+        if k == 1
+            for j in 2:n-1
+                ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j] - v[j])/2
+            end
+            ĉ[n] = (c[n-1] + c[1]*row1[n] - row1[1]*c[n] - v[n])/2
+            cst = 2/d
+        else
+            for j in k+1:n-1
+                ĉ[j] = c[j+1] + c[j-1] + c[k]*row1[j] - row1[k]*c[j] - v[j]
+            end
+            ĉ[n] = c[n-1] + c[k]*row1[n] - row1[k]*c[n] - v[n]
+            cst = 1/d
+        end
+        for j in k+1:n
+            row1[j] = -cst*l[j]
+        end
+        cst = c[k+1]/d
+        for j in k:n
+            c[j] = ĉ[j] - cst*l[j]
+        end
+        gd1 = G[k, 1]/d
+        gd2 = G[k, 2]/d
+        for j in k:n
+            G[j, 1] -= l[j]*gd1
+            G[j, 2] -= l[j]*gd2
+        end
+    end
+    L[n, n] = sqrt(c[n])
+end
+
+function cholesky(W::ChebyshevGramMatrix{T, <: PaddedVector{T}}) where T
+    n = size(W, 1)
+    G = compute_skew_generators(W)
+    L = BandedMatrix{T}(undef, (n, n), (bandwidth(W, 1), 0))
+    c = W[:, 1]
+    ĉ = zeros(T, n)
+    l = zeros(T, n)
+    v = zeros(T, n)
+    row1 = zeros(T, n)
+    fastcholesky!(L, G, c, ĉ, l, v, row1, n)
+    return Cholesky(L, 'L', 0)
+end
+
+function fastcholesky!(L::BandedMatrix{T}, G, c, ĉ, l, v, row1, n) where T
+    b = bandwidth(L, 1)
+    @inbounds @simd for k in 1:n-1
+        d = sqrt(c[k])
+        for j in k:min(k+b, n)
+            L[j, k] = l[j] = c[j]/d
+        end
+        for j in max(k, n-b-1):n
+            v[j] = G[j, 1]*G[k, 2] - G[j, 2]*G[k, 1]
+        end
+        if k == 1
+            for j in 2:min(b+2, n-1)
+                ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j] - v[j])/2
+            end
+            if 1 ≥ n-b-1
+                ĉ[n] = (c[n-1] + c[1]*row1[n] - row1[1]*c[n] - v[n])/2
+            end
+            cst = 2/d
+        else
+            for j in k+1:min(k+b+1, n-1)
+                ĉ[j] = c[j+1] + c[j-1] + c[k]*row1[j] - row1[k]*c[j] - v[j]
+            end
+            if k ≥ n-b-1
+                ĉ[n] = c[n-1] + c[k]*row1[n] - row1[k]*c[n] - v[n]
+            end
+            cst = 1/d
+        end
+        for j in k+1:min(k+b+1, n)
+            row1[j] = -cst*l[j]
+        end
+        cst = c[k+1]/d
+        for j in k:min(k+b+1, n)
+            c[j] = ĉ[j] - cst*l[j]
+        end
+        gd1 = G[k, 1]/d
+        gd2 = G[k, 2]/d
+        for j in max(k, n-b-1):n
+            G[j, 1] -= l[j]*gd1
+            G[j, 2] -= l[j]*gd2
+        end
+    end
+    L[n, n] = sqrt(c[n])
+end
diff --git a/test/GramMatrixtests.jl b/test/GramMatrixtests.jl
new file mode 100644
index 00000000..594e9182
--- /dev/null
+++ b/test/GramMatrixtests.jl
@@ -0,0 +1,78 @@
+using FastTransforms, BandedMatrices, LazyArrays, LinearAlgebra, Test
+
+@testset "GramMatrix" begin
+    n = 128
+    for T in (Float32, Float64, BigFloat)
+        R = plan_leg2cheb(T, n; normcheb=true)*I
+        X = Tridiagonal([T(n)/(2n-1) for n in 1:n-1], zeros(T, n), [T(n)/(2n+1) for n in 1:n-1]) # Legendre X
+        W = Symmetric(R'R)
+        G = GramMatrix(W, X)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ W
+        @test F.U ≈ R
+
+        R = plan_leg2cheb(T, n; normcheb=true, normleg=true)*I
+        X = SymTridiagonal(zeros(T, n), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n-1]) # normalized Legendre X
+        W = Symmetric(R'R)
+        G = GramMatrix(W, X)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ W
+        @test F.U ≈ R
+
+        b = 4
+        X = BandedMatrix(SymTridiagonal(zeros(T, n+b), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n+b-1])) # normalized Legendre X
+        W = I+X^2+X^4
+        W = Symmetric(W[1:n, 1:n])
+        X = BandedMatrix(SymTridiagonal(zeros(T, n), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n-1])) # normalized Legendre X
+        G = GramMatrix(W, X)
+        @test bandwidths(G) == (b, b)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ W
+    end
+    W = reshape([i for i in 1.0:n^2], n, n)
+    X = reshape([i for i in 1.0:4n^2], 2n, 2n)
+    @test_throws "different sizes" GramMatrix(W, X)
+    X = X[1:n, 1:n]
+    @test_throws "nonsymmetric" GramMatrix(W, X)
+    @test_throws "nontridiagonal" GramMatrix(Symmetric(W), X)
+end
+
+@testset "ChebyshevGramMatrix" begin
+    n = 128
+    for T in (Float32, Float64, BigFloat)
+        μ = FastTransforms.chebyshevmoments1(T, 2n-1)
+        G = ChebyshevGramMatrix(μ)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ G
+        R = plan_cheb2leg(T, n; normleg=true)*I
+        @test F.U ≈ R
+
+        α, β = (T(0.123), T(0.456))
+        μ = FastTransforms.chebyshevjacobimoments1(T, 2n-1, α, β)
+        G = ChebyshevGramMatrix(μ)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ G
+        R = plan_cheb2jac(T, n, α, β; normjac=true)*I
+        @test F.U ≈ R
+
+        μ = -FastTransforms.chebyshevlogmoments1(T, 2n-1)
+        G = ChebyshevGramMatrix(μ)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ G
+
+        μ = FastTransforms.chebyshevabsmoments1(T, 2n-1)
+        G = ChebyshevGramMatrix(μ)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ G
+
+        μ = PaddedVector(T(1) ./ [1,2,3,4,5], 2n-1)
+        G = ChebyshevGramMatrix(μ)
+        @test bandwidths(G) == (4, 4)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ G
+        μd = Vector{T}(μ)
+        Gd = ChebyshevGramMatrix(μd)
+        Fd = cholesky(Gd)
+        @test F.L ≈ Fd.L
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 1c13e255..7561df00 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,5 +10,5 @@ include("gaunttests.jl")
 include("hermitetests.jl")
 include("toeplitzplanstests.jl")
 include("toeplitzhankeltests.jl")
-include("symmetrictoeplitzplushankeltests.jl")
-include("arraystests.jl")
\ No newline at end of file
+include("grammatrixtests.jl")
+include("arraystests.jl")

From 69b731777d6507f97c2f5fb0018e8c9da3f4ebc1 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 1 Nov 2024 14:38:01 -0500
Subject: [PATCH 195/222] standard doc is better for AbstractGramMatrix

---
 docs/src/index.md | 4 ----
 src/GramMatrix.jl | 7 +------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index 503485fa..06d70e5d 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -12,10 +12,6 @@ For this documentation, please see the documentation for [FastTransforms](https:
 
 ### Fast Cholesky factorization of the Gram matrix
 
-```@docs
-AbstractGramMatrix
-```
-
 ```@docs
 GramMatrix
 ```
diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index f5542f42..21017a2d 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -1,8 +1,3 @@
-"""
-    AbstractGramMatrix
-
-Supertype of Gram matrices such as [`GramMatrix`](@ref) and [`ChebyshevGramMatrix`](@ref).
-"""
 abstract type AbstractGramMatrix{T} <: LayoutMatrix{T} end
 
 @inline issymmetric(G::AbstractGramMatrix) = true
@@ -23,7 +18,7 @@ the Gram matrix satisfies the skew-symmetric rank-2 displacement equation (``X =
 ```math
 X^\\top W - WX = GJG^\\top,
 ```
-where ``J = \\begin{pmatrix} 0 & 1\\ -1 & 0\\end{pmatrix}`` and where:
+where ``J = \\begin{pmatrix} 0 & 1\\\\ -1 & 0\\end{pmatrix}`` and where:
 ```math
 G_{:, 1} = e_n,\\quad{\\rm and}\\quad G_{:, 2} = W_{n-1, :}X_{n-1, n} - X^\\top W_{:, n}.
 ```

From 1bd226e6cfcb82b0b83a117c802649fb1409615c Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 1 Nov 2024 14:40:54 -0500
Subject: [PATCH 196/222] bits and bobs

---
 src/GramMatrix.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index 21017a2d..1c2ee6cf 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -7,7 +7,7 @@ abstract type AbstractGramMatrix{T} <: LayoutMatrix{T} end
 """
     GramMatrix(W::AbstractMatrix, X::AbstractMatrix)
 
-Construct a Gram matrix of size ``size(W)`` with data stored in ``W``.
+Construct a symmetric positive-definite Gram matrix with data stored in ``W``.
 Given a family of orthogonal polynomials ``{\\bf P}(x) = \\{p_0(x), p_1(x),\\ldots\\}``
 and a continuous inner product ``\\langle f, g\\rangle``, the Gram matrix is defined by:
 ```math
@@ -161,7 +161,7 @@ end
 """
     ChebyshevGramMatrix(μ::AbstractVector)
 
-Construct a Chebyshev--Gram matrix of size ``(length(μ)+1)÷2`` with entries:
+Construct a Chebyshev--Gram matrix of size `(length(μ)+1)÷2` with entries:
 ```math
 W_{i,j} = \\frac{\\mu_{|i-j|+1} +\\mu_{i+j-1}}{2}.
 ```

From 7f4626586f864d691c476981b747d8313d085906 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 1 Nov 2024 15:14:10 -0500
Subject: [PATCH 197/222] start adding ToeplitzPlusHankel

---
 src/FastTransforms.jl           |   4 +-
 src/GramMatrix.jl               |   2 +-
 src/ToeplitzPlusHankel.jl       | 319 ++++++++++++++++++++++++++++++++
 test/runtests.jl                |   1 +
 test/toeplitzplushankeltests.jl |  11 ++
 5 files changed, 335 insertions(+), 2 deletions(-)
 create mode 100644 src/ToeplitzPlusHankel.jl
 create mode 100644 test/toeplitzplushankeltests.jl

diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 5f401e0f..8a3d5a3c 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -125,7 +125,9 @@ include("specialfunctions.jl")
 include("toeplitzplans.jl")
 include("toeplitzhankel.jl")
 
-include("SymmetricToeplitzPlusHankel.jl")
+export ToeplitzPlusHankel
+
+include("ToeplitzPlusHankel.jl")
 
 # following use libfasttransforms by default
 for f in (:jac2jac,
diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index 1c2ee6cf..11d23c85 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -22,7 +22,7 @@ where ``J = \\begin{pmatrix} 0 & 1\\\\ -1 & 0\\end{pmatrix}`` and where:
 ```math
 G_{:, 1} = e_n,\\quad{\\rm and}\\quad G_{:, 2} = W_{n-1, :}X_{n-1, n} - X^\\top W_{:, n}.
 ```
-Fast (``{\\cal O}(n^2)``) Cholesky factorization of the Gram matrix returns the
+Fast (``O(n^2)``) Cholesky factorization of the Gram matrix returns the
 connection coefficients between ``{\\bf P}(x)`` and the polynomials ``{\\bf Q}(x)``
 orthogonal in the modified inner product, ``{\\bf P}(x) = {\\bf Q}(x) R``.
 """
diff --git a/src/ToeplitzPlusHankel.jl b/src/ToeplitzPlusHankel.jl
new file mode 100644
index 00000000..c1aa2345
--- /dev/null
+++ b/src/ToeplitzPlusHankel.jl
@@ -0,0 +1,319 @@
+struct ToeplitzPlusHankel{T, S, P1 <: Plan{S}, P2 <: Plan{S}} <: AbstractMatrix{T}
+    tc::Vector{T}
+    tr::Vector{T}
+    h::Vector{T}
+    th_dft::Matrix{S}
+    tht_dft::Matrix{S}
+    temp::Matrix{S}
+    plan::P1
+    iplan::P2
+    size::NTuple{2, Int}
+end
+
+# enforces tr[1] == tc[1]
+function ToeplitzPlusHankel(tc::Vector{T}, tr::Vector{T}, h::Vector{T}) where T
+    m = length(tc)
+    n = length(tr)
+    @assert length(h) == m+n-1
+    tr[1] = tc[1]
+    mn = m+n
+    S = promote_type(float(T), Complex{Float32})
+    th_dft = Matrix{S}(undef, mn, 2)
+    copyto!(th_dft, 1, tc, 1, m)
+    th_dft[m+1, 1] = zero(T)
+    copyto!(th_dft, m+2, Iterators.reverse(tr), 1, n-1)
+    copyto!(th_dft, mn+1, h, n, m)
+    th_dft[m+1, 2] = zero(T)
+    copyto!(th_dft, mn+m+2, h, 1, n-1)
+    tht_dft = Matrix{S}(undef, mn, 2)
+    copyto!(tht_dft, 1, tr, 1, n)
+    tht_dft[n+1, 1] = zero(T)
+    copyto!(tht_dft, n+2, Iterators.reverse(tc), 1, m-1)
+    copyto!(tht_dft, mn+1, h, m, n)
+    tht_dft[n+1, 2] = zero(T)
+    copyto!(tht_dft, mn+n+2, h, 1, m-1)
+
+    plan = plan_fft!(th_dft, 1)
+    plan*th_dft
+    plan*tht_dft
+    temp = zeros(S, mn, 2)
+    iplan = inv(plan)
+
+    ToeplitzPlusHankel{T, S, typeof(plan), typeof(iplan)}(tc, tr, h, th_dft, tht_dft, temp, plan, iplan, (m, n))
+end
+
+# A ChebyshevGramMatrix isa (symmetric positive-definite) ToeplitzPlusHankel matrix.
+function ToeplitzPlusHankel(G::ChebyshevGramMatrix)
+    n = size(G, 1)
+    ToeplitzPlusHankel(G.μ[1:n]/2, G.μ[1:n]/2, G.μ/2)
+end
+
+size(A::ToeplitzPlusHankel) = A.size
+getindex(A::ToeplitzPlusHankel, i::Integer, j::Integer) = (i ≥ j ? A.tc[i-j+1] : A.tr[j-i+1]) + A.h[i+j-1]
+
+# A view of a T+H is also T+H.
+function getindex(A::ToeplitzPlusHankel, ir::UnitRange{Int}, jr::UnitRange{Int})
+    fir, lir = first(ir), last(ir)
+    fjr, ljr = first(jr), last(jr)
+    if fir ≥ fjr
+        tc = A.tc[fir-fjr+1:lir-fjr+1]
+        tr = [A.tc[fir-fjr+1:-1:max(1, fir-ljr+1)]; A.tr[2:ljr-fir+1]]
+    else
+        tc = [A.tr[fjr-fir+1:-1:max(1, fjr-lir+1)]; A.tc[2:lir-fjr+1]]
+        tr = A.tr[fjr-fir+1:ljr-fir+1]
+    end
+    ToeplitzPlusHankel(tc, tr, A.h[fir+fjr-1:lir+ljr-1])
+end
+
+
+# y ← A x α + y β
+function mul!(y::StridedVector{T}, A::ToeplitzPlusHankel{T}, x::StridedVector{T}, α::S, β::S) where {T <: Real, S <: Real}
+    m, n = size(A)
+    @assert m == length(y)
+    @assert n == length(x)
+    mn = m+n
+    th_dft = A.th_dft
+    temp = A.temp
+    plan = A.plan
+    iplan = A.iplan
+
+    copyto!(temp, 1, x, 1, n)
+    copyto!(temp, mn+1, Iterators.reverse(x), 1, n)
+    @inbounds for j in n+1:mn
+        temp[j, 1] = zero(T)
+        temp[j, 2] = zero(T)
+    end
+    plan*temp
+    temp .*= th_dft
+    iplan*temp
+
+    if iszero(β)
+        @inbounds @simd for i in 1:m
+            y[i] = α * (real(temp[i, 1])+real(temp[i, 2]))
+        end
+    else
+        @inbounds @simd for i in 1:m
+            y[i] = α * (real(temp[i, 1])+real(temp[i, 2])) + β*y[i]
+        end
+    end
+    return y
+end
+
+# y ← A' x α + y β
+function mul!(y::StridedVector{T}, A::Adjoint{T, <:ToeplitzPlusHankel{T}}, x::StridedVector{T}, α::S, β::S) where {T <: Real, S <: Real}
+    m, n = size(A)
+    @assert m == length(y)
+    @assert n == length(x)
+    mn = m+n
+    AP = A.parent
+    tht_dft = AP.tht_dft
+    temp = AP.temp
+    plan = AP.plan
+    iplan = AP.iplan
+
+    copyto!(temp, 1, x, 1, n)
+    copyto!(temp, mn+1, Iterators.reverse(x), 1, n)
+    @inbounds for j in n+1:mn
+        temp[j, 1] = zero(T)
+        temp[j, 2] = zero(T)
+    end
+    plan*temp
+    temp .*= tht_dft
+    iplan*temp
+
+    if iszero(β)
+        @inbounds @simd for i in 1:m
+            y[i] = α * (real(temp[i, 1])+real(temp[i, 2]))
+        end
+    else
+        @inbounds @simd for i in 1:m
+            y[i] = α * (real(temp[i, 1])+real(temp[i, 2])) + β*y[i]
+        end
+    end
+    return y
+end
+
+
+# C ← A B α + C β
+function mul!(C::StridedMatrix{T}, A::ToeplitzPlusHankel{T}, B::StridedMatrix{T}, α::S, β::S) where {T <: Real, S <: Real}
+    m, n = size(A)
+    @assert m == size(C, 1)
+    @assert n == size(B, 1)
+    p = size(B, 2)
+    if size(C, 2) != p
+        throw(DimensionMismatch("input and output matrices must have same number of columns"))
+    end
+
+    th_dft = A.th_dft
+    TC = promote_type(float(T), Complex{Float32})
+    temp = zeros(TC, m+n, 2p)
+    plan = plan_fft!(temp, 1)
+
+    for k in 1:p
+        copyto!(view(temp, :, 2k-1), 1, view(B, :, k), 1, n)
+        copyto!(view(temp, :, 2k), 1, Iterators.reverse(view(B, :, k)), 1, n)
+    end
+    plan*temp
+    for k in 1:p
+        vt = view(temp, :, 2k-1:2k)
+        vt .*= th_dft
+    end
+    plan\temp
+
+    if iszero(β)
+        @inbounds for k in 1:p
+            for i in 1:m
+                C[i, k] = α * (real(temp[i, 2k-1])+real(temp[i, 2k]))
+            end
+        end
+    else
+        @inbounds for k in 1:p
+            for i in 1:m
+                C[i, k] = α * (real(temp[i, 2k-1])+real(temp[i, 2k])) + β*C[i, k]
+            end
+        end
+    end
+    return C
+end
+
+# Morally equivalent to mul!(C', B', A', α, β)' with StridedMatrix replaced by AbstractMatrix below
+function mul!(C::StridedMatrix{T}, A::StridedMatrix{T}, B::ToeplitzPlusHankel{T}, α::S, β::S) where {T <: Real, S <: Real}
+    n, m = size(B)
+    @assert m == size(C, 2)
+    @assert n == size(A, 2)
+    p = size(A, 1)
+    if size(C, 1) != p
+        throw(DimensionMismatch("input and output matrices must have same number of rows"))
+    end
+
+    tht_dft = B.tht_dft
+    TC = promote_type(float(T), Complex{Float32})
+    temp = zeros(TC, m+n, 2p)
+    plan = plan_fft!(temp, 1)
+
+    for k in 1:p
+        copyto!(view(temp, :, 2k-1), 1, view(A, k, :), 1, n)
+        copyto!(view(temp, :, 2k), 1, Iterators.reverse(view(A, k, :)), 1, n)
+    end
+    plan*temp
+    for k in 1:p
+        vt = view(temp, :, 2k-1:2k)
+        vt .*= tht_dft
+    end
+    plan\temp
+
+    if iszero(β)
+        @inbounds for k in 1:p
+            for i in 1:m
+                C[k, i] = α * (real(temp[i, 2k-1])+real(temp[i, 2k]))
+            end
+        end
+    else
+        @inbounds for k in 1:p
+            for i in 1:m
+                C[k, i] = α * (real(temp[i, 2k-1])+real(temp[i, 2k])) + β*C[k, i]
+            end
+        end
+    end
+    return C
+end
+
+# C ← A' B α + C β
+function mul!(C::StridedMatrix{T}, A::Adjoint{T, <:ToeplitzPlusHankel{T}}, B::StridedMatrix{T}, α::S, β::S) where {T <: Real, S <: Real}
+    m, n = size(A)
+    @assert m == size(C, 1)
+    @assert n == size(B, 1)
+    p = size(B, 2)
+    if size(C, 2) != p
+        throw(DimensionMismatch("input and output matrices must have same number of columns"))
+    end
+
+    tht_dft = A.parent.tht_dft
+    TC = promote_type(float(T), Complex{Float32})
+    temp = zeros(TC, m+n, 2p)
+    plan = plan_fft!(temp, 1)
+
+    for k in 1:p
+        copyto!(view(temp, :, 2k-1), 1, view(B, :, k), 1, n)
+        copyto!(view(temp, :, 2k), 1, Iterators.reverse(view(B, :, k)), 1, n)
+    end
+    plan*temp
+    for k in 1:p
+        vt = view(temp, :, 2k-1:2k)
+        vt .*= tht_dft
+    end
+    plan\temp
+
+    if iszero(β)
+        @inbounds for k in 1:p
+            for i in 1:m
+                C[i, k] = α * (real(temp[i, 2k-1])+real(temp[i, 2k]))
+            end
+        end
+    else
+        @inbounds for k in 1:p
+            for i in 1:m
+                C[i, k] = α * (real(temp[i, 2k-1])+real(temp[i, 2k])) + β*C[i, k]
+            end
+        end
+    end
+    return C
+end
+
+# Estimate the Frobenius norm of the Toeplitz-plus-Hankel matrix by working with the symbols.
+function normest(A::ToeplitzPlusHankel{T}) where T
+    m, n = size(A)
+    tc = A.tc
+    tr = A.tr
+    h = A.h
+    ret1 = zero(T)
+    ret2 = zero(T)
+    if m == min(m, n)
+        for i = 1:m
+            ret1 += (m+1-i)*abs2(tc[i])
+        end
+        for i = 2:n-m
+            ret1 += m*abs2(tr[i])
+        end
+        for i = n-m+1:n
+            ret1 += (n-i)*abs2(tr[i])
+        end
+        for i = 1:m
+            ret2 += i*abs2(h[i])
+        end
+        for i = m+1:n
+            ret2 += m*abs2(h[i])
+        end
+        for i = n+1:m+n-1
+            ret2 += (m+n-i)*abs2(h[i])
+        end
+    else
+        for i = 1:n
+            ret1 += (n+1-i)*abs2(tr[i])
+        end
+        for i = 2:m-n
+            ret1 += n*abs2(tc[i])
+        end
+        for i = m-n+1:m
+            ret1 += (m-i)*abs2(tc[i])
+        end
+        for i = 1:n
+            ret2 += i*abs2(h[i])
+        end
+        for i = n+1:m
+            ret2 += n*abs2(h[i])
+        end
+        for i = m+1:m+n-1
+            ret2 += (m+n-i)*abs2(h[i])
+        end
+    end
+    sqrt(ret1) + sqrt(ret2)
+end
+
+normest(A::Symmetric{T, <: ToeplitzPlusHankel{T}}) where T = normest(parent(A))
+normest(A::Hermitian{T, <: ToeplitzPlusHankel{T}}) where T = normest(parent(A))
+
+function normest(A::ChebyshevGramMatrix{T}) where T
+    n = size(A, 1)
+    normest(A[1:n, 1:n])
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 7561df00..36c95de8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,5 +10,6 @@ include("gaunttests.jl")
 include("hermitetests.jl")
 include("toeplitzplanstests.jl")
 include("toeplitzhankeltests.jl")
+include("toeplitzplushankeltests.jl")
 include("grammatrixtests.jl")
 include("arraystests.jl")
diff --git a/test/toeplitzplushankeltests.jl b/test/toeplitzplushankeltests.jl
new file mode 100644
index 00000000..9a2130ad
--- /dev/null
+++ b/test/toeplitzplushankeltests.jl
@@ -0,0 +1,11 @@
+using FastTransforms, LinearAlgebra, Test
+
+@testset "ToeplitzPlusHankel" begin
+    n = 128
+    for T in (Float32, Float64)
+        μ = FastTransforms.chebyshevmoments1(T, 2n-1)
+        G = ChebyshevGramMatrix(μ)
+        TpH = ToeplitzPlusHankel(G)
+        @test TpH ≈ G
+    end
+end

From 1e05dd6e9cb90893d19769f7bd955906f1abc613 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 1 Nov 2024 16:00:33 -0500
Subject: [PATCH 198/222] remove STPH and fix normest

---
 src/SymmetricToeplitzPlusHankel.jl       | 215 -----------------------
 src/ToeplitzPlusHankel.jl                |  16 +-
 test/symmetrictoeplitzplushankeltests.jl |  51 ------
 test/toeplitzplushankeltests.jl          |   4 +
 4 files changed, 10 insertions(+), 276 deletions(-)
 delete mode 100644 src/SymmetricToeplitzPlusHankel.jl
 delete mode 100644 test/symmetrictoeplitzplushankeltests.jl

diff --git a/src/SymmetricToeplitzPlusHankel.jl b/src/SymmetricToeplitzPlusHankel.jl
deleted file mode 100644
index 1084977b..00000000
--- a/src/SymmetricToeplitzPlusHankel.jl
+++ /dev/null
@@ -1,215 +0,0 @@
-struct SymmetricToeplitzPlusHankel{T} <: AbstractMatrix{T}
-    v::Vector{T}
-    n::Int
-end
-
-function SymmetricToeplitzPlusHankel(v::Vector{T}) where T
-    n = (length(v)+1)÷2
-    SymmetricToeplitzPlusHankel{T}(v, n)
-end
-
-size(A::SymmetricToeplitzPlusHankel{T}) where T = (A.n, A.n)
-getindex(A::SymmetricToeplitzPlusHankel{T}, i::Integer, j::Integer) where T = A.v[abs(i-j)+1] + A.v[i+j-1]
-
-struct SymmetricBandedToeplitzPlusHankel{T} <: BandedMatrices.AbstractBandedMatrix{T}
-    v::Vector{T}
-    n::Int
-    b::Int
-end
-
-function SymmetricBandedToeplitzPlusHankel(v::Vector{T}, n::Integer) where T
-    SymmetricBandedToeplitzPlusHankel{T}(v, n, length(v)-1)
-end
-
-size(A::SymmetricBandedToeplitzPlusHankel{T}) where T = (A.n, A.n)
-function getindex(A::SymmetricBandedToeplitzPlusHankel{T}, i::Integer, j::Integer) where T
-    v = A.v
-    if abs(i-j) < length(v)
-        if i+j-1 ≤ length(v)
-            v[abs(i-j)+1] + v[i+j-1]
-        else
-            v[abs(i-j)+1]
-        end
-    else
-        zero(T)
-    end
-end
-bandwidths(A::SymmetricBandedToeplitzPlusHankel{T}) where T = (A.b, A.b)
-
-#
-# X'W-W*X = G*J*G'
-# This returns G, where J = [0 1; -1 0], respecting the skew-symmetry of the right-hand side.
-#
-function compute_skew_generators(A::SymmetricToeplitzPlusHankel{T}) where T
-    v = A.v
-    n = size(A, 1)
-    G = zeros(T, n, 2)
-    G[n, 1] = one(T)
-    @inbounds @simd for j in 1:n-1
-        G[j, 2] = -(v[n+2-j] + v[n+j])
-    end
-    G[n, 2] = -v[2]
-    G
-end
-
-function cholesky(A::SymmetricToeplitzPlusHankel{T}) where T
-    n = size(A, 1)
-    G = compute_skew_generators(A)
-    L = zeros(T, n, n)
-    c = A[:, 1]
-    ĉ = zeros(T, n)
-    l = zeros(T, n)
-    v = zeros(T, n)
-    row1 = zeros(T, n)
-    STPHcholesky!(L, G, c, ĉ, l, v, row1, n)
-    return Cholesky(L, 'L', 0)
-end
-
-function STPHcholesky!(L::Matrix{T}, G, c, ĉ, l, v, row1, n) where T
-    @inbounds @simd for k in 1:n-1
-        d = sqrt(c[1])
-        for j in 1:n-k+1
-            L[j+k-1, k] = l[j] = c[j]/d
-        end
-        for j in 1:n-k+1
-            v[j] = G[j, 1]*G[1, 2] - G[j, 2]*G[1, 1]
-        end
-        if k == 1
-            for j in 2:n-k
-                ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j] - v[j])/2
-            end
-            ĉ[n-k+1] = (c[n-k] + c[1]*row1[n-k+1] - row1[1]*c[n-k+1] - v[n-k+1])/2
-            cst = 2/d
-            for j in 1:n-k
-                row1[j] = -cst*l[j+1]
-            end
-        else
-            for j in 2:n-k
-                ĉ[j] = c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j] - v[j]
-            end
-            ĉ[n-k+1] = c[n-k] + c[1]*row1[n-k+1] - row1[1]*c[n-k+1] - v[n-k+1]
-            cst = 1/d
-            for j in 1:n-k
-                row1[j] = -cst*l[j+1]
-            end
-        end
-        cst = c[2]/d
-        for j in 1:n-k
-            c[j] = ĉ[j+1] - cst*l[j+1]
-        end
-        gd1 = G[1, 1]/d
-        gd2 = G[1, 2]/d
-        for j in 1:n-k
-            G[j, 1] = G[j+1, 1] - l[j+1]*gd1
-            G[j, 2] = G[j+1, 2] - l[j+1]*gd2
-        end
-    end
-    L[n, n] = sqrt(c[1])
-end
-
-function cholesky(A::SymmetricBandedToeplitzPlusHankel{T}) where T
-    n = size(A, 1)
-    b = A.b
-    L = BandedMatrix{T}(undef, (n, n), (bandwidth(A, 1), 0))
-    c = A[1:b+2, 1]
-    ĉ = zeros(T, b+3)
-    l = zeros(T, b+3)
-    row1 = zeros(T, b+2)
-    SBTPHcholesky!(L, c, ĉ, l, row1, n, b)
-    return Cholesky(L, 'L', 0)
-end
-
-function SBTPHcholesky!(L::BandedMatrix{T}, c, ĉ, l, row1, n, b) where T
-    @inbounds @simd for k in 1:n
-        d = sqrt(c[1])
-        for j in 1:b+1
-            l[j] = c[j]/d
-        end
-        for j in 1:min(n-k+1, b+1)
-            L[j+k-1, k] = l[j]
-        end
-        if k == 1
-            for j in 2:b+1
-                ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j])/2
-            end
-            ĉ[b+2] = (c[b+1] + c[1]*row1[b+2] - row1[1]*c[b+2])/2
-            cst = 2/d
-            for j in 1:b+2
-                row1[j] = -cst*l[j+1]
-            end
-        else
-            for j in 2:b+1
-                ĉ[j] = (c[j+1] + c[j-1] + c[1]*row1[j] - row1[1]*c[j])
-            end
-            ĉ[b+2] = (c[b+1] + c[1]*row1[b+2] - row1[1]*c[b+2])
-            cst = 1/d
-            for j in 1:b+2
-                row1[j] = -cst*l[j+1]
-            end
-        end
-        cst = c[2]/d
-        for j in 1:b+2
-            c[j] = ĉ[j+1] - cst*l[j+1]
-        end
-    end
-end
-
-
-
-#
-# X'W-W*X = G*J*G'
-# This returns G, where J = [0 1; -1 0], respecting the skew-symmetry of the right-hand side.
-#
-function compute_skew_generators(W::Symmetric{T, <: AbstractMatrix{T}}, X::Tridiagonal{T, Vector{T}}) where T
-    @assert size(W) == size(X)
-    m, n = size(W)
-    G = zeros(T, n, 2)
-    G[n, 1] = one(T)
-    G[:, 2] .= W[n-1, :]*X[n-1, n] - X'W[:, n]
-    return G
-end
-
-function fastcholesky(W::Symmetric{T, <: AbstractMatrix{T}}, X::Tridiagonal{T, Vector{T}}) where T
-    n = size(W, 1)
-    G = compute_skew_generators(W, X)
-    L = zeros(T, n, n)
-    c = W[:, 1]
-    ĉ = zeros(T, n)
-    l = zeros(T, n)
-    v = zeros(T, n)
-    row1 = zeros(T, n)
-    fastcholesky!(L, X, G, c, ĉ, l, v, row1, n)
-    return Cholesky(L, 'L', 0)
-end
-
-
-function fastcholesky!(L::Matrix{T}, X::Tridiagonal{T, Vector{T}}, G, c, ĉ, l, v, row1, n) where T
-    @inbounds @simd for k in 1:n-1
-        d = sqrt(c[k])
-        for j in k:n
-            L[j, k] = l[j] = c[j]/d
-        end
-        for j in k:n
-            v[j] = G[j, 1]*G[k, 2] - G[j, 2]*G[k, 1]
-        end
-        for j in k+1:n-1
-            ĉ[j] = (X[j-1, j]*c[j-1] + (X[j, j]-X[k, k])*c[j] + X[j+1, j]*c[j+1] + c[k]*row1[j] - row1[k]*c[j] - v[j])/X[k+1, k]
-        end
-        ĉ[n] = (X[n-1, n]*c[n-1] + (X[n, n]-X[k, k])*c[n] + c[k]*row1[n] - row1[k]*c[n] - v[n])/X[k+1, k]
-        cst = X[k+1, k]/d
-        for j in k+1:n
-            row1[j] = -cst*l[j]
-        end
-        cst = c[k+1]/d
-        for j in k:n
-            c[j] = ĉ[j] - cst*l[j]
-        end
-        gd1 = G[k, 1]/d
-        gd2 = G[k, 2]/d
-        for j in k:n
-            G[j, 1] -= l[j]*gd1
-            G[j, 2] -= l[j]*gd2
-        end
-    end
-    L[n, n] = sqrt(c[n])
-end
diff --git a/src/ToeplitzPlusHankel.jl b/src/ToeplitzPlusHankel.jl
index c1aa2345..aee6bbf9 100644
--- a/src/ToeplitzPlusHankel.jl
+++ b/src/ToeplitzPlusHankel.jl
@@ -275,8 +275,8 @@ function normest(A::ToeplitzPlusHankel{T}) where T
         for i = 2:n-m
             ret1 += m*abs2(tr[i])
         end
-        for i = n-m+1:n
-            ret1 += (n-i)*abs2(tr[i])
+        for i = max(n-m+1, 2):n
+            ret1 += (n+1-i)*abs2(tr[i])
         end
         for i = 1:m
             ret2 += i*abs2(h[i])
@@ -294,8 +294,8 @@ function normest(A::ToeplitzPlusHankel{T}) where T
         for i = 2:m-n
             ret1 += n*abs2(tc[i])
         end
-        for i = m-n+1:m
-            ret1 += (m-i)*abs2(tc[i])
+        for i = max(m-n+1, 2):m
+            ret1 += (m+1-i)*abs2(tc[i])
         end
         for i = 1:n
             ret2 += i*abs2(h[i])
@@ -310,10 +310,6 @@ function normest(A::ToeplitzPlusHankel{T}) where T
     sqrt(ret1) + sqrt(ret2)
 end
 
-normest(A::Symmetric{T, <: ToeplitzPlusHankel{T}}) where T = normest(parent(A))
+normest(A::Symmetric{T, <: ToeplitzPlusHankel{T}}) where T = normest(parent(A))+1
 normest(A::Hermitian{T, <: ToeplitzPlusHankel{T}}) where T = normest(parent(A))
-
-function normest(A::ChebyshevGramMatrix{T}) where T
-    n = size(A, 1)
-    normest(A[1:n, 1:n])
-end
+normest(A::ChebyshevGramMatrix{T}) where T = normest(ToeplitzPlusHankel(A))
diff --git a/test/symmetrictoeplitzplushankeltests.jl b/test/symmetrictoeplitzplushankeltests.jl
deleted file mode 100644
index 8f37e775..00000000
--- a/test/symmetrictoeplitzplushankeltests.jl
+++ /dev/null
@@ -1,51 +0,0 @@
-using BandedMatrices, FastTransforms, LinearAlgebra, ToeplitzMatrices, Test
-
-import FastTransforms: SymmetricToeplitzPlusHankel, SymmetricBandedToeplitzPlusHankel
-
-@testset "SymmetricToeplitzPlusHankel" begin
-    n = 128
-    for T in (Float32, Float64, BigFloat)
-        μ = -FastTransforms.chebyshevlogmoments1(T, 2n-1)
-        μ[1] += 1
-        W = SymmetricToeplitzPlusHankel(μ/2)
-        SMW = Symmetric(Matrix(W))
-        @test W ≈ SymmetricToeplitz(μ[1:(length(μ)+1)÷2]/2) + Hankel(μ/2)
-        L = cholesky(W).L
-        R = cholesky(SMW).U
-        @test L*L' ≈ W
-        @test L' ≈ R
-    end
-end
-
-@testset "SymmetricBandedToeplitzPlusHankel" begin
-    n = 1024
-    for T in (Float32, Float64)
-        μ = T[1.875; 0.00390625; 0.5; 0.0009765625; 0.0625]
-        W = SymmetricBandedToeplitzPlusHankel(μ/2, n)
-        SBW = Symmetric(BandedMatrix(W))
-        W1 = SymmetricToeplitzPlusHankel([μ/2; zeros(2n-1-length(μ))])
-        SMW = Symmetric(Matrix(W))
-        U = cholesky(SMW).U
-        L = cholesky(W1).L
-        UB = cholesky(SBW).U
-        R = cholesky(W).U
-        @test L*L' ≈ W
-        @test UB'UB ≈ W
-        @test R'R ≈ W
-        @test UB ≈ U
-        @test L' ≈ U
-        @test R ≈ U
-    end
-end
-
-@testset "Fast Cholesky" begin
-    n = 128
-    for T in (Float32, Float64, BigFloat)
-        R = plan_leg2cheb(T, n; normcheb=true)*I
-        X = Tridiagonal([T(n)/(2n-1) for n in 1:n-1], zeros(T, n), [T(n)/(2n+1) for n in 1:n-1]) # Legendre X
-        W = Symmetric(R'R)
-        F = FastTransforms.fastcholesky(W, X)
-        @test F.L*F.L' ≈ W
-        @test F.U ≈ R
-    end
-end
diff --git a/test/toeplitzplushankeltests.jl b/test/toeplitzplushankeltests.jl
index 9a2130ad..4e0c5633 100644
--- a/test/toeplitzplushankeltests.jl
+++ b/test/toeplitzplushankeltests.jl
@@ -1,5 +1,7 @@
 using FastTransforms, LinearAlgebra, Test
 
+import FastTransforms: normest
+
 @testset "ToeplitzPlusHankel" begin
     n = 128
     for T in (Float32, Float64)
@@ -7,5 +9,7 @@ using FastTransforms, LinearAlgebra, Test
         G = ChebyshevGramMatrix(μ)
         TpH = ToeplitzPlusHankel(G)
         @test TpH ≈ G
+        @test norm(TpH) ≤ normest(TpH)
+        @test normest(TpH) == normest(G)
     end
 end

From c0532d38615fc023d766393daf8876fe6eaafc2f Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 4 Nov 2024 09:23:34 -0600
Subject: [PATCH 199/222] remove +1

---
 src/ToeplitzPlusHankel.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ToeplitzPlusHankel.jl b/src/ToeplitzPlusHankel.jl
index aee6bbf9..535184b7 100644
--- a/src/ToeplitzPlusHankel.jl
+++ b/src/ToeplitzPlusHankel.jl
@@ -310,6 +310,6 @@ function normest(A::ToeplitzPlusHankel{T}) where T
     sqrt(ret1) + sqrt(ret2)
 end
 
-normest(A::Symmetric{T, <: ToeplitzPlusHankel{T}}) where T = normest(parent(A))+1
+normest(A::Symmetric{T, <: ToeplitzPlusHankel{T}}) where T = normest(parent(A))
 normest(A::Hermitian{T, <: ToeplitzPlusHankel{T}}) where T = normest(parent(A))
 normest(A::ChebyshevGramMatrix{T}) where T = normest(ToeplitzPlusHankel(A))

From 49478453f516a54eb3b171d746ddeeec9839420a Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 4 Nov 2024 12:48:11 -0600
Subject: [PATCH 200/222] add a test for nonzero diagonal jacobi matrices

---
 src/GramMatrix.jl       | 4 ++--
 test/GramMatrixtests.jl | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index 11d23c85..1c438f37 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -56,10 +56,10 @@ GramMatrix(W::WT, X::XT) where {T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix
 #
 function compute_skew_generators(W::GramMatrix{T}) where T
     X = W.X
-    m, n = size(W)
+    n = size(W, 1)
     G = zeros(T, n, 2)
     G[n, 1] = one(T)
-    G[:, 2] .= W[n-1, :]*X[n-1, n] - X'W[:, n]
+    G[:, 2] .= W[n-1, :]*X[n-1, n] + W[n, :]*X[n, n] - X'W[:, n]
     return G
 end
 
diff --git a/test/GramMatrixtests.jl b/test/GramMatrixtests.jl
index 594e9182..3ac9d626 100644
--- a/test/GramMatrixtests.jl
+++ b/test/GramMatrixtests.jl
@@ -28,6 +28,15 @@ using FastTransforms, BandedMatrices, LazyArrays, LinearAlgebra, Test
         @test bandwidths(G) == (b, b)
         F = cholesky(G)
         @test F.L*F.L' ≈ W
+
+        X = BandedMatrix(SymTridiagonal(T[2n-1 for n in 1:n+b], T[-n for n in 1:n+b-1])) # Laguerre X, tests nonzero diagonal
+        W = I+X^2+X^4
+        W = Symmetric(W[1:n, 1:n])
+        X = BandedMatrix(SymTridiagonal(T[2n-1 for n in 1:n], T[-n for n in 1:n-1])) # Laguerre X
+        G = GramMatrix(W, X)
+        @test bandwidths(G) == (b, b)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ W
     end
     W = reshape([i for i in 1.0:n^2], n, n)
     X = reshape([i for i in 1.0:4n^2], 2n, 2n)

From 7d91fc38a408e8f07306fd9e26489b05a29eebea Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 4 Nov 2024 12:54:28 -0600
Subject: [PATCH 201/222] swap W indexing to make more sense

---
 src/GramMatrix.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index 1c438f37..53210052 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -59,7 +59,7 @@ function compute_skew_generators(W::GramMatrix{T}) where T
     n = size(W, 1)
     G = zeros(T, n, 2)
     G[n, 1] = one(T)
-    G[:, 2] .= W[n-1, :]*X[n-1, n] + W[n, :]*X[n, n] - X'W[:, n]
+    G[:, 2] .= W[:, n-1]*X[n-1, n] + W[:, n]*X[n, n] - X'W[:, n]
     return G
 end
 

From 0b0bbb2b167aa8c2a95b2d190e06d7798f9c2bd8 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 21 Nov 2024 12:26:15 -0600
Subject: [PATCH 202/222] add semiclassical example

---
 docs/Project.toml         |  1 +
 docs/make.jl              |  2 ++
 examples/semiclassical.jl | 66 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)
 create mode 100644 examples/semiclassical.jl

diff --git a/docs/Project.toml b/docs/Project.toml
index c96b3cd3..969efecb 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,4 +1,5 @@
 [deps]
+ApproxFun = "28f2ccd6-bb30-5033-b560-165f7b14dc2f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FastTransforms = "057dd010-8810-581a-b7be-e3fc3b93f78c"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
diff --git a/docs/make.jl b/docs/make.jl
index dd530e8f..b6a44b03 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -13,6 +13,7 @@ examples = [
     "halfrange.jl",
     "nonlocaldiffusion.jl",
     "padua.jl",
+    "semiclassical.jl",
     "sphere.jl",
     "spinweighted.jl",
     "subspaceangles.jl",
@@ -47,6 +48,7 @@ makedocs(
                         "generated/halfrange.md",
                         "generated/nonlocaldiffusion.md",
                         "generated/padua.md",
+                        "generated/semiclassical.md",
                         "generated/sphere.md",
                         "generated/spinweighted.md",
                         "generated/subspaceangles.md",
diff --git a/examples/semiclassical.jl b/examples/semiclassical.jl
new file mode 100644
index 00000000..6b3595b9
--- /dev/null
+++ b/examples/semiclassical.jl
@@ -0,0 +1,66 @@
+# # Semi-classical Jacobi polynomials
+# In this example, we will consider the semi-classical orthogonal polynomials with respect to the inner product:
+# ```math
+# \langle f, g \rangle = \int_{-1}^1 f(x) g(x) w(x){\rm d} x,
+# ```
+# where $w(x) = w^{(\alpha,\beta,\gamma,\delta,\epsilon)}(x) = (1-x)^\alpha(1+x)^\beta(2+x)^\gamma(3+x)^\delta(5-x)^\epsilon$ is a modification of the Jacobi weight.
+# We shall use results from [this paper](https://arxiv.org/abs/2302.08448) to consider these semi-classical orthogonal polynomials as modifications of the Jacobi polynomials $P_n^{(\alpha,\beta)}(x)$.
+
+using ApproxFun, FastTransforms, LinearAlgebra, Plots, LaTeXStrings
+const GENFIGS = joinpath(pkgdir(FastTransforms), "docs/src/generated")
+!isdir(GENFIGS) && mkdir(GENFIGS)
+plotlyjs()
+
+# We set the five parameters:
+α,β,γ,δ,ϵ = -0.125, -0.25, 0.123, 0.456, 0.789
+
+# We use `ApproxFun` to construct a finite normalized Jacobi series as a proxy for $(2+x)^\gamma(3+x)^\delta(5-x)^\epsilon$.
+u = Fun(x->(2+x)^γ*(3+x)^δ*(5-x)^ϵ, NormalizedJacobi(β, α))
+
+# Our working polynomial degree will be:
+n = 100
+
+# We compute the connection coefficients between the modified orthogonal polynomials and the Jacobi polynomials:
+P = plan_modifiedjac2jac(Float64, n+1, α, β, u.coefficients)
+
+# We store the connection to first kind Chebyshev polynomials:
+P1 = plan_jac2cheb(Float64, n+1, α, β; normjac = true)
+
+# We compute the Chebyshev series for the degree-$k\le n$ modified polynomial and its values at the Chebyshev points:
+q = k -> lmul!(P1, lmul!(P, [zeros(k); 1.0; zeros(n-k)]))
+qvals = k-> ichebyshevtransform(q(k))
+
+# With the symmetric Jacobi matrix for $P_n^{(\alpha, \beta)}(x)$ and the modified plan, we may compute the modified Jacobi matrix and the corresponding roots (as eigenvalues):
+x = Fun(x->x, NormalizedJacobi(β, α))
+XP = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:n, 1:n]))
+XQ = FastTransforms.modified_jacobi_matrix(P, XP)
+SymTridiagonal(XQ.dv[1:10], XQ.ev[1:9])
+
+# And we plot:
+x = chebyshevpoints(Float64, n+1, Val(1))
+p = plot(x, qvals(0); linewidth=2.0, legend = false, xlim=(-1,1), xlabel=L"x",
+         ylabel=L"Q_n(x)", title="Semi-classical Jacobi Polynomials and Their Roots",
+         extra_plot_kwargs = KW(:include_mathjax => "cdn"))
+for k in 1:10
+    λ = eigvals(SymTridiagonal(XQ.dv[1:k], XQ.ev[1:k-1]))
+    plot!(x, qvals(k); linewidth=2.0, color=palette(:default)[k+1])
+    scatter!(λ, zero(λ); markersize=2.5, color=palette(:default)[k+1])
+end
+p
+savefig(joinpath(GENFIGS, "semiclassical.html"))
+###```@raw html
+###<object type="text/html" data="../semiclassical.html" style="width:100%;height:400px;"></object>
+###```
+
+# By [Theorem 2.20](https://arxiv.org/abs/2302.08448) it turns out that the *derivatives* of these particular semi-classical Jacobi polynomials are a linear combination of at most four polynomials orthogonal with respect to $(1-x)^{\alpha+1}(1+x)^{\beta+1}(2+x)^{\gamma+1}(3+x)^{\delta+1}(5-x)^{\epsilon+1}$ on $(-1,1)$. This fact enables us to compute the banded differentiation matrix:
+v = Fun(x->(2+x)^(γ+1)*(3+x)^(δ+1)*(5-x)^(ϵ+1), NormalizedJacobi(β+1, α+1))
+function threshold!(A::AbstractArray, ϵ)
+    for i in eachindex(A)
+        if abs(A[i]) < ϵ A[i] = 0 end
+    end
+    A
+end
+P′ = plan_modifiedjac2jac(Float64, n+1, α+1, β+1, v.coefficients)
+DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(-1/2,0)}(y) = P^{(1/2,1)}(y) D_P.
+DQ = UpperTriangular(threshold!(P′\(DP*(P*I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(y) = Q̂(y) D_Q.
+UpperTriangular(DQ[1:10,1:10])

From 18d12a7cc0b2f7e864c0ec4eb39f905c0c59e286 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 21 Nov 2024 12:36:12 -0600
Subject: [PATCH 203/222] minor changes

---
 examples/semiclassical.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/semiclassical.jl b/examples/semiclassical.jl
index 6b3595b9..a19ca463 100644
--- a/examples/semiclassical.jl
+++ b/examples/semiclassical.jl
@@ -52,7 +52,7 @@ savefig(joinpath(GENFIGS, "semiclassical.html"))
 ###<object type="text/html" data="../semiclassical.html" style="width:100%;height:400px;"></object>
 ###```
 
-# By [Theorem 2.20](https://arxiv.org/abs/2302.08448) it turns out that the *derivatives* of these particular semi-classical Jacobi polynomials are a linear combination of at most four polynomials orthogonal with respect to $(1-x)^{\alpha+1}(1+x)^{\beta+1}(2+x)^{\gamma+1}(3+x)^{\delta+1}(5-x)^{\epsilon+1}$ on $(-1,1)$. This fact enables us to compute the banded differentiation matrix:
+# By [Theorem 2.20](https://arxiv.org/abs/2302.08448) it turns out that the *derivatives* of these particular semi-classical Jacobi polynomials are a linear combination of at most four polynomials orthogonal with respect to the weight $w^{(\alpha+1,\beta+1,\gamma+1,\delta+1,\epsilon+1)}(x)$ on $(-1,1)$. This fact enables us to compute the banded differentiation matrix:
 v = Fun(x->(2+x)^(γ+1)*(3+x)^(δ+1)*(5-x)^(ϵ+1), NormalizedJacobi(β+1, α+1))
 function threshold!(A::AbstractArray, ϵ)
     for i in eachindex(A)
@@ -61,6 +61,6 @@ function threshold!(A::AbstractArray, ϵ)
     A
 end
 P′ = plan_modifiedjac2jac(Float64, n+1, α+1, β+1, v.coefficients)
-DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(-1/2,0)}(y) = P^{(1/2,1)}(y) D_P.
+DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(y) = P^{(α+1,β+1)}(y) D_P.
 DQ = UpperTriangular(threshold!(P′\(DP*(P*I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(y) = Q̂(y) D_Q.
 UpperTriangular(DQ[1:10,1:10])

From bc0d4d43adddde9a4d3d5a14614f29d39d9e6804 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 21 Nov 2024 13:53:55 -0600
Subject: [PATCH 204/222] add GramMatrix approach

---
 examples/semiclassical.jl | 47 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/examples/semiclassical.jl b/examples/semiclassical.jl
index a19ca463..0b39f7bd 100644
--- a/examples/semiclassical.jl
+++ b/examples/semiclassical.jl
@@ -28,11 +28,11 @@ P1 = plan_jac2cheb(Float64, n+1, α, β; normjac = true)
 
 # We compute the Chebyshev series for the degree-$k\le n$ modified polynomial and its values at the Chebyshev points:
 q = k -> lmul!(P1, lmul!(P, [zeros(k); 1.0; zeros(n-k)]))
-qvals = k-> ichebyshevtransform(q(k))
+qvals = k -> ichebyshevtransform(q(k))
 
 # With the symmetric Jacobi matrix for $P_n^{(\alpha, \beta)}(x)$ and the modified plan, we may compute the modified Jacobi matrix and the corresponding roots (as eigenvalues):
 x = Fun(x->x, NormalizedJacobi(β, α))
-XP = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:n, 1:n]))
+XP = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:n+1, 1:n+1]))
 XQ = FastTransforms.modified_jacobi_matrix(P, XP)
 SymTridiagonal(XQ.dv[1:10], XQ.ev[1:9])
 
@@ -64,3 +64,46 @@ P′ = plan_modifiedjac2jac(Float64, n+1, α+1, β+1, v.coefficients)
 DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(y) = P^{(α+1,β+1)}(y) D_P.
 DQ = UpperTriangular(threshold!(P′\(DP*(P*I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(y) = Q̂(y) D_Q.
 UpperTriangular(DQ[1:10,1:10])
+
+# A faster method now exists via the `GramMatrix` architecture and its associated displacement equation. We compute `U`:
+U = Symmetric(Multiplication(u, space(u))[1:n+1, 1:n+1])
+
+# Then we form a `GramMatrix` together with the Jacobi matrix:
+G = GramMatrix(U, XP)
+
+# And compute its cholesky factorization. The upper-triangular Cholesky factor represents the connection between original Jacobi and semi-classical Jacobi as ${\bf P}^{(\alpha,\beta)}(x) = {\bf Q}(x) R$.
+R = cholesky(G).U
+
+# Every else works almost as before, including evaluation on a Chebyshev grid:
+q = k -> lmul!(P1, ldiv!(R, [zeros(k); 1.0; zeros(n-k)]))
+qvals = k -> ichebyshevtransform(q(k))
+
+# Computation of the modified Jacobi matrix:
+XQ1 = FastTransforms.modified_jacobi_matrix(R, XP)
+norm(XQ-XQ1)/norm(XQ)
+
+# Plotting:
+x = chebyshevpoints(Float64, n+1, Val(1))
+p = plot(x, qvals(0); linewidth=2.0, legend = false, xlim=(-1,1), xlabel=L"x",
+         ylabel=L"Q_n(x)", title="Semi-classical Jacobi Polynomials and Their Roots",
+         extra_plot_kwargs = KW(:include_mathjax => "cdn"))
+for k in 1:10
+    λ = eigvals(SymTridiagonal(XQ1.dv[1:k], XQ1.ev[1:k-1]))
+    plot!(x, qvals(k); linewidth=2.0, color=palette(:default)[k+1])
+    scatter!(λ, zero(λ); markersize=2.5, color=palette(:default)[k+1])
+end
+p
+savefig(joinpath(GENFIGS, "semiclassical1.html"))
+###```@raw html
+###<object type="text/html" data="../semiclassical1.html" style="width:100%;height:400px;"></object>
+###```
+
+# And banded differentiation:
+V = Symmetric(Multiplication(v, space(v))[1:n+1, 1:n+1])
+x = Fun(x->x, NormalizedJacobi(β+1, α+1))
+XP′ = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:n+1, 1:n+1]))
+G′ = GramMatrix(V, XP′)
+R′ = cholesky(G′).U
+DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(y) = P^{(α+1,β+1)}(y) D_P.
+DQ = UpperTriangular(threshold!(R′*(DP*(R\I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(y) = Q̂(y) D_Q.
+UpperTriangular(DQ[1:10,1:10])

From 0af4a9e3e48295b24b0beaa21557f34bb261b482 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Tue, 26 Nov 2024 12:12:59 -0600
Subject: [PATCH 205/222] add two new constructors for the GramMatrix based on
 modified OP moments

---
 docs/Project.toml         |  1 +
 examples/semiclassical.jl | 40 ++++++++++++++------------
 src/GramMatrix.jl         | 59 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 81 insertions(+), 19 deletions(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index 969efecb..ed412282 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -3,6 +3,7 @@ ApproxFun = "28f2ccd6-bb30-5033-b560-165f7b14dc2f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FastTransforms = "057dd010-8810-581a-b7be-e3fc3b93f78c"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
+LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 PlotlyJS = "f0f68f2c-4968-5e81-91da-67840de0976a"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
diff --git a/examples/semiclassical.jl b/examples/semiclassical.jl
index 0b39f7bd..c8c26cc8 100644
--- a/examples/semiclassical.jl
+++ b/examples/semiclassical.jl
@@ -4,9 +4,9 @@
 # \langle f, g \rangle = \int_{-1}^1 f(x) g(x) w(x){\rm d} x,
 # ```
 # where $w(x) = w^{(\alpha,\beta,\gamma,\delta,\epsilon)}(x) = (1-x)^\alpha(1+x)^\beta(2+x)^\gamma(3+x)^\delta(5-x)^\epsilon$ is a modification of the Jacobi weight.
-# We shall use results from [this paper](https://arxiv.org/abs/2302.08448) to consider these semi-classical orthogonal polynomials as modifications of the Jacobi polynomials $P_n^{(\alpha,\beta)}(x)$.
+# We shall use results from [this paper](https://arxiv.org/abs/2302.08448) to consider these semi-classical orthogonal polynomials as modifications of the orthonormalized Jacobi polynomials $\tilde{P}_n^{(\alpha,\beta)}(x)$.
 
-using ApproxFun, FastTransforms, LinearAlgebra, Plots, LaTeXStrings
+using ApproxFun, FastTransforms, LazyArrays, LinearAlgebra, Plots, LaTeXStrings
 const GENFIGS = joinpath(pkgdir(FastTransforms), "docs/src/generated")
 !isdir(GENFIGS) && mkdir(GENFIGS)
 plotlyjs()
@@ -30,7 +30,7 @@ P1 = plan_jac2cheb(Float64, n+1, α, β; normjac = true)
 q = k -> lmul!(P1, lmul!(P, [zeros(k); 1.0; zeros(n-k)]))
 qvals = k -> ichebyshevtransform(q(k))
 
-# With the symmetric Jacobi matrix for $P_n^{(\alpha, \beta)}(x)$ and the modified plan, we may compute the modified Jacobi matrix and the corresponding roots (as eigenvalues):
+# With the symmetric Jacobi matrix for $\tilde{P}_n^{(\alpha, \beta)}(x)$ and the modified plan, we may compute the modified Jacobi matrix and the corresponding roots (as eigenvalues):
 x = Fun(x->x, NormalizedJacobi(β, α))
 XP = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:n+1, 1:n+1]))
 XQ = FastTransforms.modified_jacobi_matrix(P, XP)
@@ -61,18 +61,21 @@ function threshold!(A::AbstractArray, ϵ)
     A
 end
 P′ = plan_modifiedjac2jac(Float64, n+1, α+1, β+1, v.coefficients)
-DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(y) = P^{(α+1,β+1)}(y) D_P.
-DQ = UpperTriangular(threshold!(P′\(DP*(P*I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(y) = Q̂(y) D_Q.
-UpperTriangular(DQ[1:10,1:10])
+DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(x) = P^{(α+1,β+1)}(x) D_P.
+DQ = UpperTriangular(threshold!(P′\(DP*(P*I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(x) = Q̂(x) D_Q.
+UpperTriangular(DQ[1:10, 1:10])
 
-# A faster method now exists via the `GramMatrix` architecture and its associated displacement equation. We compute `U`:
-U = Symmetric(Multiplication(u, space(u))[1:n+1, 1:n+1])
-
-# Then we form a `GramMatrix` together with the Jacobi matrix:
-G = GramMatrix(U, XP)
+# A faster method now exists via the `GramMatrix` architecture and its associated displacement equation. Given the modified orthogonal polynomial moments implied by the normalized Jacobi series for $u(x)$, we pad this vector to the necessary size and construct the `GramMatrix` with these moments, the multiplication operator, and the constant $\tilde{P}_0^{(\alpha,\beta)}(x)$:
+μ = PaddedVector(u.coefficients, 2n+1)
+x = Fun(x->x, NormalizedJacobi(β, α))
+XP2 = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:2n+1, 1:2n+1]))
+p0 = Fun(NormalizedJacobi(β, α), [1])(0)
+G = GramMatrix(μ, XP2, p0)
+G[1:10, 1:10]
 
 # And compute its cholesky factorization. The upper-triangular Cholesky factor represents the connection between original Jacobi and semi-classical Jacobi as ${\bf P}^{(\alpha,\beta)}(x) = {\bf Q}(x) R$.
 R = cholesky(G).U
+R[1:10, 1:10]
 
 # Every else works almost as before, including evaluation on a Chebyshev grid:
 q = k -> lmul!(P1, ldiv!(R, [zeros(k); 1.0; zeros(n-k)]))
@@ -99,11 +102,12 @@ savefig(joinpath(GENFIGS, "semiclassical1.html"))
 ###```
 
 # And banded differentiation:
-V = Symmetric(Multiplication(v, space(v))[1:n+1, 1:n+1])
-x = Fun(x->x, NormalizedJacobi(β+1, α+1))
-XP′ = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:n+1, 1:n+1]))
-G′ = GramMatrix(V, XP′)
+μ′ = PaddedVector(v.coefficients, 2n+1)
+x′ = Fun(x->x, NormalizedJacobi(β+1, α+1))
+XP′ = SymTridiagonal(Symmetric(Multiplication(x′, space(x′))[1:2n+1, 1:2n+1]))
+p0′ = Fun(NormalizedJacobi(β+1, α+1), [1])(0)
+G′ = GramMatrix(μ′, XP′, p0′)
 R′ = cholesky(G′).U
-DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(y) = P^{(α+1,β+1)}(y) D_P.
-DQ = UpperTriangular(threshold!(R′*(DP*(R\I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(y) = Q̂(y) D_Q.
-UpperTriangular(DQ[1:10,1:10])
+DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(x) = P^{(α+1,β+1)}(x) D_P.
+DQ = UpperTriangular(threshold!(R′*(DP*(R\I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(x) = Q̂(x) D_Q.
+UpperTriangular(DQ[1:10, 1:10])
diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index 53210052..e7e8a70c 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -50,6 +50,64 @@ GramMatrix(W::WT, X::XT) where {T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix
 @inline bandwidths(G::GramMatrix) = bandwidths(G.W)
 @inline MemoryLayout(G::GramMatrix) = MemoryLayout(G.W)
 
+"""
+    GramMatrix(μ::AbstractVector, X::AbstractMatrix)
+
+Construct a GramMatrix from modified orthogonal polynomial moments and the multiplication operator.
+In the standard (classical) normalization, ``p_0(x) = 1``, so that the moments
+``\\mu_n = \\langle p_{n-1}, 1\\rangle`` are in fact the first column of the Gram matrix.
+The recurrence is built from ``X^\\top W = WX``.
+"""
+GramMatrix(μ::AbstractVector{T}, X::XT) where {T, XT <: AbstractMatrix{T}} = GramMatrix(μ, X, one(T))
+function GramMatrix(μ::AbstractVector{T}, X::XT, p0::T) where {T, XT <: AbstractMatrix{T}}
+    N = length(μ)
+    n = (N+1)÷2
+    @assert N == size(X, 1) == size(X, 2)
+    @assert bandwidths(X) == (1, 1)
+    W = Matrix{T}(undef, N, N)
+    if n > 0
+        @inbounds for m in 1:N
+            W[m, 1] = p0*μ[m]
+        end
+    end
+    if n > 1
+        @inbounds for m in 2:N-1
+            W[m, 2] = (X[m-1, m]*W[m-1, 1] + (X[m, m]-X[1, 1])*W[m, 1] + X[m+1, m]*W[m+1, 1])/X[2, 1]
+        end
+    end
+    @inbounds @simd for n in 3:n
+        for m in n:N-n+1
+            W[m, n] = (X[m-1, m]*W[m-1, n-1] + (X[m, m]-X[n-1, n-1])*W[m, n-1] + X[m+1, m]*W[m+1, n-1] - X[n-2, n-1]*W[m, n-2])/X[n, n-1]
+        end
+    end
+    return GramMatrix(Symmetric(W[1:n, 1:n], :L), eval(XT.name.name)(view(X, 1:n, 1:n)))
+end
+
+function GramMatrix(μ::PaddedVector{T}, X::XT, p0::T) where {T, XT <: AbstractMatrix{T}}
+    N = length(μ)
+    b = length(μ.args[2])-1
+    n = (N+1)÷2
+    @assert N == size(X, 1) == size(X, 2)
+    @assert bandwidths(X) == (1, 1)
+    W = BandedMatrix{T}(undef, (N, N), (b, 0))
+    if n > 0
+        @inbounds for m in 1:min(N, b+1)
+            W[m, 1] = p0*μ[m]
+        end
+    end
+    if n > 1
+        @inbounds for m in 2:min(N-1, b+2)
+            W[m, 2] = (X[m-1, m]*W[m-1, 1] + (X[m, m]-X[1, 1])*W[m, 1] + X[m+1, m]*W[m+1, 1])/X[2, 1]
+        end
+    end
+    @inbounds @simd for n in 3:n
+        for m in n:min(N-n+1, b+n)
+            W[m, n] = (X[m-1, m]*W[m-1, n-1] + (X[m, m]-X[n-1, n-1])*W[m, n-1] + X[m+1, m]*W[m+1, n-1] - X[n-2, n-1]*W[m, n-2])/X[n, n-1]
+        end
+    end
+    return GramMatrix(Symmetric(W[1:n, 1:n], :L), eval(XT.name.name)(view(X, 1:n, 1:n)))
+end
+
 #
 # X'W-W*X = G*J*G'
 # This returns G, where J = [0 1; -1 0], respecting the skew-symmetry of the right-hand side.
@@ -201,7 +259,6 @@ function compute_skew_generators(W::ChebyshevGramMatrix{T}) where T
     @inbounds @simd for j in 1:n-1
         G[j, 2] = -(μ[n+2-j] + μ[n+j])/2
     end
-    G[n, 2] = -μ[2]/2
     G
 end
 

From 368dcb994da728c622c408d3aabea085b53fe1c5 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Tue, 26 Nov 2024 12:39:23 -0600
Subject: [PATCH 206/222] switch to views

- these display the parent types, giving more information
---
 examples/semiclassical.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/semiclassical.jl b/examples/semiclassical.jl
index c8c26cc8..9acecaca 100644
--- a/examples/semiclassical.jl
+++ b/examples/semiclassical.jl
@@ -34,7 +34,7 @@ qvals = k -> ichebyshevtransform(q(k))
 x = Fun(x->x, NormalizedJacobi(β, α))
 XP = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:n+1, 1:n+1]))
 XQ = FastTransforms.modified_jacobi_matrix(P, XP)
-SymTridiagonal(XQ.dv[1:10], XQ.ev[1:9])
+view(XQ, 1:7, 1:7)
 
 # And we plot:
 x = chebyshevpoints(Float64, n+1, Val(1))
@@ -63,7 +63,7 @@ end
 P′ = plan_modifiedjac2jac(Float64, n+1, α+1, β+1, v.coefficients)
 DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(x) = P^{(α+1,β+1)}(x) D_P.
 DQ = UpperTriangular(threshold!(P′\(DP*(P*I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(x) = Q̂(x) D_Q.
-UpperTriangular(DQ[1:10, 1:10])
+UpperTriangular(DQ[1:9, 1:9])
 
 # A faster method now exists via the `GramMatrix` architecture and its associated displacement equation. Given the modified orthogonal polynomial moments implied by the normalized Jacobi series for $u(x)$, we pad this vector to the necessary size and construct the `GramMatrix` with these moments, the multiplication operator, and the constant $\tilde{P}_0^{(\alpha,\beta)}(x)$:
 μ = PaddedVector(u.coefficients, 2n+1)
@@ -71,11 +71,11 @@ x = Fun(x->x, NormalizedJacobi(β, α))
 XP2 = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:2n+1, 1:2n+1]))
 p0 = Fun(NormalizedJacobi(β, α), [1])(0)
 G = GramMatrix(μ, XP2, p0)
-G[1:10, 1:10]
+view(G, 1:7, 1:7)
 
 # And compute its cholesky factorization. The upper-triangular Cholesky factor represents the connection between original Jacobi and semi-classical Jacobi as ${\bf P}^{(\alpha,\beta)}(x) = {\bf Q}(x) R$.
 R = cholesky(G).U
-R[1:10, 1:10]
+UpperTriangular(view(R, 1:7, 1:7))
 
 # Every else works almost as before, including evaluation on a Chebyshev grid:
 q = k -> lmul!(P1, ldiv!(R, [zeros(k); 1.0; zeros(n-k)]))
@@ -110,4 +110,4 @@ G′ = GramMatrix(μ′, XP′, p0′)
 R′ = cholesky(G′).U
 DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(x) = P^{(α+1,β+1)}(x) D_P.
 DQ = UpperTriangular(threshold!(R′*(DP*(R\I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(x) = Q̂(x) D_Q.
-UpperTriangular(DQ[1:10, 1:10])
+UpperTriangular(DQ[1:9, 1:9])

From 73ebb102ec1d246f9437c85732954a99ec68e051 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Thu, 12 Dec 2024 14:35:52 -0600
Subject: [PATCH 207/222] add O(bn) GramMatrix constructor from moments

---
 Project.toml      | 2 +-
 src/GramMatrix.jl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index be1349a2..a7e938a4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.16.6"
+version = "0.16.7"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index e7e8a70c..8bf6dc42 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -64,7 +64,7 @@ function GramMatrix(μ::AbstractVector{T}, X::XT, p0::T) where {T, XT <: Abstrac
     n = (N+1)÷2
     @assert N == size(X, 1) == size(X, 2)
     @assert bandwidths(X) == (1, 1)
-    W = Matrix{T}(undef, N, N)
+    W = LowerTriangular(Matrix{T}(undef, N, N))
     if n > 0
         @inbounds for m in 1:N
             W[m, 1] = p0*μ[m]

From 0c5cc86c8feaef0f039387f7b63d13c06ef9a632 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 13 Dec 2024 08:46:21 -0600
Subject: [PATCH 208/222] delete tests

---
 test/GramMatrixtests.jl | 87 -----------------------------------------
 1 file changed, 87 deletions(-)
 delete mode 100644 test/GramMatrixtests.jl

diff --git a/test/GramMatrixtests.jl b/test/GramMatrixtests.jl
deleted file mode 100644
index 3ac9d626..00000000
--- a/test/GramMatrixtests.jl
+++ /dev/null
@@ -1,87 +0,0 @@
-using FastTransforms, BandedMatrices, LazyArrays, LinearAlgebra, Test
-
-@testset "GramMatrix" begin
-    n = 128
-    for T in (Float32, Float64, BigFloat)
-        R = plan_leg2cheb(T, n; normcheb=true)*I
-        X = Tridiagonal([T(n)/(2n-1) for n in 1:n-1], zeros(T, n), [T(n)/(2n+1) for n in 1:n-1]) # Legendre X
-        W = Symmetric(R'R)
-        G = GramMatrix(W, X)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ W
-        @test F.U ≈ R
-
-        R = plan_leg2cheb(T, n; normcheb=true, normleg=true)*I
-        X = SymTridiagonal(zeros(T, n), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n-1]) # normalized Legendre X
-        W = Symmetric(R'R)
-        G = GramMatrix(W, X)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ W
-        @test F.U ≈ R
-
-        b = 4
-        X = BandedMatrix(SymTridiagonal(zeros(T, n+b), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n+b-1])) # normalized Legendre X
-        W = I+X^2+X^4
-        W = Symmetric(W[1:n, 1:n])
-        X = BandedMatrix(SymTridiagonal(zeros(T, n), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n-1])) # normalized Legendre X
-        G = GramMatrix(W, X)
-        @test bandwidths(G) == (b, b)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ W
-
-        X = BandedMatrix(SymTridiagonal(T[2n-1 for n in 1:n+b], T[-n for n in 1:n+b-1])) # Laguerre X, tests nonzero diagonal
-        W = I+X^2+X^4
-        W = Symmetric(W[1:n, 1:n])
-        X = BandedMatrix(SymTridiagonal(T[2n-1 for n in 1:n], T[-n for n in 1:n-1])) # Laguerre X
-        G = GramMatrix(W, X)
-        @test bandwidths(G) == (b, b)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ W
-    end
-    W = reshape([i for i in 1.0:n^2], n, n)
-    X = reshape([i for i in 1.0:4n^2], 2n, 2n)
-    @test_throws "different sizes" GramMatrix(W, X)
-    X = X[1:n, 1:n]
-    @test_throws "nonsymmetric" GramMatrix(W, X)
-    @test_throws "nontridiagonal" GramMatrix(Symmetric(W), X)
-end
-
-@testset "ChebyshevGramMatrix" begin
-    n = 128
-    for T in (Float32, Float64, BigFloat)
-        μ = FastTransforms.chebyshevmoments1(T, 2n-1)
-        G = ChebyshevGramMatrix(μ)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ G
-        R = plan_cheb2leg(T, n; normleg=true)*I
-        @test F.U ≈ R
-
-        α, β = (T(0.123), T(0.456))
-        μ = FastTransforms.chebyshevjacobimoments1(T, 2n-1, α, β)
-        G = ChebyshevGramMatrix(μ)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ G
-        R = plan_cheb2jac(T, n, α, β; normjac=true)*I
-        @test F.U ≈ R
-
-        μ = -FastTransforms.chebyshevlogmoments1(T, 2n-1)
-        G = ChebyshevGramMatrix(μ)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ G
-
-        μ = FastTransforms.chebyshevabsmoments1(T, 2n-1)
-        G = ChebyshevGramMatrix(μ)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ G
-
-        μ = PaddedVector(T(1) ./ [1,2,3,4,5], 2n-1)
-        G = ChebyshevGramMatrix(μ)
-        @test bandwidths(G) == (4, 4)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ G
-        μd = Vector{T}(μ)
-        Gd = ChebyshevGramMatrix(μd)
-        Fd = cholesky(Gd)
-        @test F.L ≈ Fd.L
-    end
-end

From 5260921ef953b357e7eb9a0fd4b0567b4484ef61 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 13 Dec 2024 08:47:06 -0600
Subject: [PATCH 209/222] reinstate tests

---
 Project.toml            |  2 +-
 test/grammatrixtests.jl | 87 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 test/grammatrixtests.jl

diff --git a/Project.toml b/Project.toml
index a7e938a4..11bc8361 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.16.7"
+version = "0.16.8"
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
diff --git a/test/grammatrixtests.jl b/test/grammatrixtests.jl
new file mode 100644
index 00000000..3ac9d626
--- /dev/null
+++ b/test/grammatrixtests.jl
@@ -0,0 +1,87 @@
+using FastTransforms, BandedMatrices, LazyArrays, LinearAlgebra, Test
+
+@testset "GramMatrix" begin
+    n = 128
+    for T in (Float32, Float64, BigFloat)
+        R = plan_leg2cheb(T, n; normcheb=true)*I
+        X = Tridiagonal([T(n)/(2n-1) for n in 1:n-1], zeros(T, n), [T(n)/(2n+1) for n in 1:n-1]) # Legendre X
+        W = Symmetric(R'R)
+        G = GramMatrix(W, X)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ W
+        @test F.U ≈ R
+
+        R = plan_leg2cheb(T, n; normcheb=true, normleg=true)*I
+        X = SymTridiagonal(zeros(T, n), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n-1]) # normalized Legendre X
+        W = Symmetric(R'R)
+        G = GramMatrix(W, X)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ W
+        @test F.U ≈ R
+
+        b = 4
+        X = BandedMatrix(SymTridiagonal(zeros(T, n+b), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n+b-1])) # normalized Legendre X
+        W = I+X^2+X^4
+        W = Symmetric(W[1:n, 1:n])
+        X = BandedMatrix(SymTridiagonal(zeros(T, n), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n-1])) # normalized Legendre X
+        G = GramMatrix(W, X)
+        @test bandwidths(G) == (b, b)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ W
+
+        X = BandedMatrix(SymTridiagonal(T[2n-1 for n in 1:n+b], T[-n for n in 1:n+b-1])) # Laguerre X, tests nonzero diagonal
+        W = I+X^2+X^4
+        W = Symmetric(W[1:n, 1:n])
+        X = BandedMatrix(SymTridiagonal(T[2n-1 for n in 1:n], T[-n for n in 1:n-1])) # Laguerre X
+        G = GramMatrix(W, X)
+        @test bandwidths(G) == (b, b)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ W
+    end
+    W = reshape([i for i in 1.0:n^2], n, n)
+    X = reshape([i for i in 1.0:4n^2], 2n, 2n)
+    @test_throws "different sizes" GramMatrix(W, X)
+    X = X[1:n, 1:n]
+    @test_throws "nonsymmetric" GramMatrix(W, X)
+    @test_throws "nontridiagonal" GramMatrix(Symmetric(W), X)
+end
+
+@testset "ChebyshevGramMatrix" begin
+    n = 128
+    for T in (Float32, Float64, BigFloat)
+        μ = FastTransforms.chebyshevmoments1(T, 2n-1)
+        G = ChebyshevGramMatrix(μ)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ G
+        R = plan_cheb2leg(T, n; normleg=true)*I
+        @test F.U ≈ R
+
+        α, β = (T(0.123), T(0.456))
+        μ = FastTransforms.chebyshevjacobimoments1(T, 2n-1, α, β)
+        G = ChebyshevGramMatrix(μ)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ G
+        R = plan_cheb2jac(T, n, α, β; normjac=true)*I
+        @test F.U ≈ R
+
+        μ = -FastTransforms.chebyshevlogmoments1(T, 2n-1)
+        G = ChebyshevGramMatrix(μ)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ G
+
+        μ = FastTransforms.chebyshevabsmoments1(T, 2n-1)
+        G = ChebyshevGramMatrix(μ)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ G
+
+        μ = PaddedVector(T(1) ./ [1,2,3,4,5], 2n-1)
+        G = ChebyshevGramMatrix(μ)
+        @test bandwidths(G) == (4, 4)
+        F = cholesky(G)
+        @test F.L*F.L' ≈ G
+        μd = Vector{T}(μ)
+        Gd = ChebyshevGramMatrix(μd)
+        Fd = cholesky(Gd)
+        @test F.L ≈ Fd.L
+    end
+end

From a40b3dc339f8cceaff5a956814079c44349c8124 Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Thu, 9 Jan 2025 11:55:35 +0000
Subject: [PATCH 210/222] Change clenshaw!/horner! to mutate first argument
 (#255)

* Change clenshaw!/horner! to mutate first argument

* Update libfasttransformstests.jl

* Update Project.toml

* Update make.jl

* Update sphere.jl

* Update make.jl

* Move semiclassical.jl to ApproxFunExamples
---
 Project.toml                   |   7 +-
 docs/Project.toml              |   1 -
 docs/make.jl                   |   2 -
 examples/semiclassical.jl      | 113 ---------------------------------
 examples/sphere.jl             |   6 +-
 src/FastTransforms.jl          |  13 ++--
 src/libfasttransforms.jl       |  12 ++--
 test/libfasttransformstests.jl |   6 +-
 8 files changed, 20 insertions(+), 140 deletions(-)
 delete mode 100644 examples/semiclassical.jl

diff --git a/Project.toml b/Project.toml
index 11bc8361..2959561e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,7 @@
 name = "FastTransforms"
 uuid = "057dd010-8810-581a-b7be-e3fc3b93f78c"
-version = "0.16.8"
+version = "0.17"
+
 
 [deps]
 AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
@@ -15,7 +16,6 @@ LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 RecurrenceRelationships = "807425ed-42ea-44d6-a357-6771516d7b2c"
-Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 ToeplitzMatrices = "c751599d-da0a-543b-9d20-d0a503d91d24"
 
@@ -29,8 +29,7 @@ FastTransforms_jll = "0.6.2"
 FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1"
 GenericFFT = "0.1"
 LazyArrays = "2.2"
-RecurrenceRelationships = "0.1"
-Reexport = "0.2, 1.0"
+RecurrenceRelationships = "0.2"
 SpecialFunctions = "0.10, 1, 2"
 ToeplitzMatrices = "0.7.1, 0.8"
 julia = "1.7"
diff --git a/docs/Project.toml b/docs/Project.toml
index ed412282..169ffdb7 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,5 +1,4 @@
 [deps]
-ApproxFun = "28f2ccd6-bb30-5033-b560-165f7b14dc2f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 FastTransforms = "057dd010-8810-581a-b7be-e3fc3b93f78c"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
diff --git a/docs/make.jl b/docs/make.jl
index b6a44b03..dd530e8f 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -13,7 +13,6 @@ examples = [
     "halfrange.jl",
     "nonlocaldiffusion.jl",
     "padua.jl",
-    "semiclassical.jl",
     "sphere.jl",
     "spinweighted.jl",
     "subspaceangles.jl",
@@ -48,7 +47,6 @@ makedocs(
                         "generated/halfrange.md",
                         "generated/nonlocaldiffusion.md",
                         "generated/padua.md",
-                        "generated/semiclassical.md",
                         "generated/sphere.md",
                         "generated/spinweighted.md",
                         "generated/subspaceangles.md",
diff --git a/examples/semiclassical.jl b/examples/semiclassical.jl
deleted file mode 100644
index 9acecaca..00000000
--- a/examples/semiclassical.jl
+++ /dev/null
@@ -1,113 +0,0 @@
-# # Semi-classical Jacobi polynomials
-# In this example, we will consider the semi-classical orthogonal polynomials with respect to the inner product:
-# ```math
-# \langle f, g \rangle = \int_{-1}^1 f(x) g(x) w(x){\rm d} x,
-# ```
-# where $w(x) = w^{(\alpha,\beta,\gamma,\delta,\epsilon)}(x) = (1-x)^\alpha(1+x)^\beta(2+x)^\gamma(3+x)^\delta(5-x)^\epsilon$ is a modification of the Jacobi weight.
-# We shall use results from [this paper](https://arxiv.org/abs/2302.08448) to consider these semi-classical orthogonal polynomials as modifications of the orthonormalized Jacobi polynomials $\tilde{P}_n^{(\alpha,\beta)}(x)$.
-
-using ApproxFun, FastTransforms, LazyArrays, LinearAlgebra, Plots, LaTeXStrings
-const GENFIGS = joinpath(pkgdir(FastTransforms), "docs/src/generated")
-!isdir(GENFIGS) && mkdir(GENFIGS)
-plotlyjs()
-
-# We set the five parameters:
-α,β,γ,δ,ϵ = -0.125, -0.25, 0.123, 0.456, 0.789
-
-# We use `ApproxFun` to construct a finite normalized Jacobi series as a proxy for $(2+x)^\gamma(3+x)^\delta(5-x)^\epsilon$.
-u = Fun(x->(2+x)^γ*(3+x)^δ*(5-x)^ϵ, NormalizedJacobi(β, α))
-
-# Our working polynomial degree will be:
-n = 100
-
-# We compute the connection coefficients between the modified orthogonal polynomials and the Jacobi polynomials:
-P = plan_modifiedjac2jac(Float64, n+1, α, β, u.coefficients)
-
-# We store the connection to first kind Chebyshev polynomials:
-P1 = plan_jac2cheb(Float64, n+1, α, β; normjac = true)
-
-# We compute the Chebyshev series for the degree-$k\le n$ modified polynomial and its values at the Chebyshev points:
-q = k -> lmul!(P1, lmul!(P, [zeros(k); 1.0; zeros(n-k)]))
-qvals = k -> ichebyshevtransform(q(k))
-
-# With the symmetric Jacobi matrix for $\tilde{P}_n^{(\alpha, \beta)}(x)$ and the modified plan, we may compute the modified Jacobi matrix and the corresponding roots (as eigenvalues):
-x = Fun(x->x, NormalizedJacobi(β, α))
-XP = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:n+1, 1:n+1]))
-XQ = FastTransforms.modified_jacobi_matrix(P, XP)
-view(XQ, 1:7, 1:7)
-
-# And we plot:
-x = chebyshevpoints(Float64, n+1, Val(1))
-p = plot(x, qvals(0); linewidth=2.0, legend = false, xlim=(-1,1), xlabel=L"x",
-         ylabel=L"Q_n(x)", title="Semi-classical Jacobi Polynomials and Their Roots",
-         extra_plot_kwargs = KW(:include_mathjax => "cdn"))
-for k in 1:10
-    λ = eigvals(SymTridiagonal(XQ.dv[1:k], XQ.ev[1:k-1]))
-    plot!(x, qvals(k); linewidth=2.0, color=palette(:default)[k+1])
-    scatter!(λ, zero(λ); markersize=2.5, color=palette(:default)[k+1])
-end
-p
-savefig(joinpath(GENFIGS, "semiclassical.html"))
-###```@raw html
-###<object type="text/html" data="../semiclassical.html" style="width:100%;height:400px;"></object>
-###```
-
-# By [Theorem 2.20](https://arxiv.org/abs/2302.08448) it turns out that the *derivatives* of these particular semi-classical Jacobi polynomials are a linear combination of at most four polynomials orthogonal with respect to the weight $w^{(\alpha+1,\beta+1,\gamma+1,\delta+1,\epsilon+1)}(x)$ on $(-1,1)$. This fact enables us to compute the banded differentiation matrix:
-v = Fun(x->(2+x)^(γ+1)*(3+x)^(δ+1)*(5-x)^(ϵ+1), NormalizedJacobi(β+1, α+1))
-function threshold!(A::AbstractArray, ϵ)
-    for i in eachindex(A)
-        if abs(A[i]) < ϵ A[i] = 0 end
-    end
-    A
-end
-P′ = plan_modifiedjac2jac(Float64, n+1, α+1, β+1, v.coefficients)
-DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(x) = P^{(α+1,β+1)}(x) D_P.
-DQ = UpperTriangular(threshold!(P′\(DP*(P*I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(x) = Q̂(x) D_Q.
-UpperTriangular(DQ[1:9, 1:9])
-
-# A faster method now exists via the `GramMatrix` architecture and its associated displacement equation. Given the modified orthogonal polynomial moments implied by the normalized Jacobi series for $u(x)$, we pad this vector to the necessary size and construct the `GramMatrix` with these moments, the multiplication operator, and the constant $\tilde{P}_0^{(\alpha,\beta)}(x)$:
-μ = PaddedVector(u.coefficients, 2n+1)
-x = Fun(x->x, NormalizedJacobi(β, α))
-XP2 = SymTridiagonal(Symmetric(Multiplication(x, space(x))[1:2n+1, 1:2n+1]))
-p0 = Fun(NormalizedJacobi(β, α), [1])(0)
-G = GramMatrix(μ, XP2, p0)
-view(G, 1:7, 1:7)
-
-# And compute its cholesky factorization. The upper-triangular Cholesky factor represents the connection between original Jacobi and semi-classical Jacobi as ${\bf P}^{(\alpha,\beta)}(x) = {\bf Q}(x) R$.
-R = cholesky(G).U
-UpperTriangular(view(R, 1:7, 1:7))
-
-# Every else works almost as before, including evaluation on a Chebyshev grid:
-q = k -> lmul!(P1, ldiv!(R, [zeros(k); 1.0; zeros(n-k)]))
-qvals = k -> ichebyshevtransform(q(k))
-
-# Computation of the modified Jacobi matrix:
-XQ1 = FastTransforms.modified_jacobi_matrix(R, XP)
-norm(XQ-XQ1)/norm(XQ)
-
-# Plotting:
-x = chebyshevpoints(Float64, n+1, Val(1))
-p = plot(x, qvals(0); linewidth=2.0, legend = false, xlim=(-1,1), xlabel=L"x",
-         ylabel=L"Q_n(x)", title="Semi-classical Jacobi Polynomials and Their Roots",
-         extra_plot_kwargs = KW(:include_mathjax => "cdn"))
-for k in 1:10
-    λ = eigvals(SymTridiagonal(XQ1.dv[1:k], XQ1.ev[1:k-1]))
-    plot!(x, qvals(k); linewidth=2.0, color=palette(:default)[k+1])
-    scatter!(λ, zero(λ); markersize=2.5, color=palette(:default)[k+1])
-end
-p
-savefig(joinpath(GENFIGS, "semiclassical1.html"))
-###```@raw html
-###<object type="text/html" data="../semiclassical1.html" style="width:100%;height:400px;"></object>
-###```
-
-# And banded differentiation:
-μ′ = PaddedVector(v.coefficients, 2n+1)
-x′ = Fun(x->x, NormalizedJacobi(β+1, α+1))
-XP′ = SymTridiagonal(Symmetric(Multiplication(x′, space(x′))[1:2n+1, 1:2n+1]))
-p0′ = Fun(NormalizedJacobi(β+1, α+1), [1])(0)
-G′ = GramMatrix(μ′, XP′, p0′)
-R′ = cholesky(G′).U
-DP = UpperTriangular(diagm(1=>[sqrt(n*(n+α+β+1)) for n in 1:n])) # The classical differentiation matrix representing 𝒟 P^{(α,β)}(x) = P^{(α+1,β+1)}(x) D_P.
-DQ = UpperTriangular(threshold!(R′*(DP*(R\I)), 100eps())) # The semi-classical differentiation matrix representing 𝒟 Q(x) = Q̂(x) D_Q.
-UpperTriangular(DQ[1:9, 1:9])
diff --git a/examples/sphere.jl b/examples/sphere.jl
index 916975b5..a28a36a2 100644
--- a/examples/sphere.jl
+++ b/examples/sphere.jl
@@ -61,7 +61,7 @@ C = [k/(k+1) for k in 0:N]
 c = zeros(N); c[N] = 1
 pts = vec([z(θ, φ)⋅y for θ in θ, φ in φ])
 phi0 = ones(N*M)
-F = reshape(FastTransforms.clenshaw!(c, A, B, C, pts, phi0, zeros(N*M)), N, M)
+F = reshape(FastTransforms.clenshaw!(zeros(N*M), c, A, B, C, pts, phi0), N, M)
 
 # We superpose a surface plot of $f$ on top of the grid:
 X = [sinpi(θ)*cospi(φ) for θ in θ, φ in φ]
@@ -91,7 +91,7 @@ U = threshold!(P\V, 400*eps())
 nrm1 = norm(U)
 
 # Similarly, on the tensor product grid, our function samples are:
-Pnxy = FastTransforms.clenshaw!(c, A, B, C, [x⋅y], [1.0], [0.0])[1]
+Pnxy = FastTransforms.clenshaw!([0.0], c, A, B, C, [x⋅y], [1.0])[1]
 F = [(F[n, m] - Pnxy)/(z(θ[n], φ[m])⋅y - x⋅y) for n in 1:N, m in 1:M]
 
 # We superpose a surface plot of $f$ on top of the grid:
@@ -108,7 +108,7 @@ U = threshold!(P\V, 400*eps())
 
 # Finally, the Legendre polynomial $P_n(z\cdot x)$ is aligned with the grid:
 pts = vec([z(θ, φ)⋅x for θ in θ, φ in φ])
-F = reshape(FastTransforms.clenshaw!(c, A, B, C, pts, phi0, zeros(N*M)), N, M)
+F = reshape(FastTransforms.clenshaw!(zeros(N*M), c, A, B, C, pts, phi0), N, M)
 
 # We superpose a surface plot of $f$ on top of the grid:
 scatter3d(vec(X), vec(Y), vec(Z); markersize=1.25, markercolor=:violetred)
diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 8a3d5a3c..4eef13ad 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -1,11 +1,11 @@
 module FastTransforms
 
 using ArrayLayouts, BandedMatrices, FastGaussQuadrature, FillArrays, LazyArrays, LinearAlgebra,
-      Reexport, SpecialFunctions, ToeplitzMatrices, RecurrenceRelationships
+      SpecialFunctions, ToeplitzMatrices, RecurrenceRelationships
 
-@reexport using AbstractFFTs
-@reexport using FFTW
-@reexport using GenericFFT
+using AbstractFFTs
+using FFTW
+using GenericFFT
 
 import Base: convert, unsafe_convert, eltype, ndims, adjoint, transpose, show,
              *, \, inv, length, size, view, getindex, tail, OneTo
@@ -34,11 +34,8 @@ import LinearAlgebra: cholesky, issymmetric, isposdef, mul!, lmul!, ldiv!
 
 import GenericFFT: interlace # imported in downstream packages
 
-import RecurrenceRelationships: clenshaw!, check_clenshaw_recurrences
+import RecurrenceRelationships: check_clenshaw_recurrences
 
-const _forwardrecurrence! = RecurrenceRelationships.forwardrecurrence!
-const _clenshaw_next = RecurrenceRelationships.clenshaw_next
-const _forwardrecurrence_next = RecurrenceRelationships.forwardrecurrence_next
 
 export leg2cheb, cheb2leg, ultra2ultra, jac2jac,
        lag2lag, jac2ultra, ultra2jac, jac2cheb,
diff --git a/src/libfasttransforms.jl b/src/libfasttransforms.jl
index d89f0490..3ce492d9 100644
--- a/src/libfasttransforms.jl
+++ b/src/libfasttransforms.jl
@@ -49,13 +49,13 @@ function renew!(x::AbstractArray{BigFloat})
     return x
 end
 
-function horner!(c::StridedVector{Float64}, x::Vector{Float64}, f::Vector{Float64})
+function horner!(f::Vector{Float64}, c::StridedVector{Float64}, x::Vector{Float64})
     @assert length(x) == length(f)
     ccall((:ft_horner, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, stride(c, 1), length(x), x, f)
     f
 end
 
-function horner!(c::StridedVector{Float32}, x::Vector{Float32}, f::Vector{Float32})
+function horner!(f::Vector{Float32}, c::StridedVector{Float32}, x::Vector{Float32})
     @assert length(x) == length(f)
     ccall((:ft_hornerf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, stride(c, 1), length(x), x, f)
     f
@@ -69,19 +69,19 @@ function check_clenshaw_points(x, f)
     length(x) == length(f) || throw(ArgumentError("Dimensions must match"))
 end
 
-function clenshaw!(c::StridedVector{Float64}, x::Vector{Float64}, f::Vector{Float64})
+function clenshaw!(f::Vector{Float64}, c::StridedVector{Float64}, x::Vector{Float64})
     @boundscheck check_clenshaw_points(x, f)
     ccall((:ft_clenshaw, libfasttransforms), Cvoid, (Cint, Ptr{Float64}, Cint, Cint, Ptr{Float64}, Ptr{Float64}), length(c), c, stride(c, 1), length(x), x, f)
     f
 end
 
-function clenshaw!(c::StridedVector{Float32}, x::Vector{Float32}, f::Vector{Float32})
+function clenshaw!(f::Vector{Float32}, c::StridedVector{Float32}, x::Vector{Float32})
     @boundscheck check_clenshaw_points(x, f)
     ccall((:ft_clenshawf, libfasttransforms), Cvoid, (Cint, Ptr{Float32}, Cint, Cint, Ptr{Float32}, Ptr{Float32}), length(c), c, stride(c, 1), length(x), x, f)
     f
 end
 
-function clenshaw!(c::StridedVector{Float64}, A::Vector{Float64}, B::Vector{Float64}, C::Vector{Float64}, x::Vector{Float64}, ϕ₀::Vector{Float64}, f::Vector{Float64})
+function clenshaw!(f::Vector{Float64}, c::StridedVector{Float64}, A::Vector{Float64}, B::Vector{Float64}, C::Vector{Float64}, x::Vector{Float64}, ϕ₀::Vector{Float64})
     N = length(c)
     @boundscheck check_clenshaw_recurrences(N, A, B, C)
     @boundscheck check_clenshaw_points(x, ϕ₀, f)
@@ -89,7 +89,7 @@ function clenshaw!(c::StridedVector{Float64}, A::Vector{Float64}, B::Vector{Floa
     f
 end
 
-function clenshaw!(c::StridedVector{Float32}, A::Vector{Float32}, B::Vector{Float32}, C::Vector{Float32}, x::Vector{Float32}, ϕ₀::Vector{Float32}, f::Vector{Float32})
+function clenshaw!(f::Vector{Float32}, c::StridedVector{Float32}, A::Vector{Float32}, B::Vector{Float32}, C::Vector{Float32}, x::Vector{Float32}, ϕ₀::Vector{Float32})
     N = length(c)
     @boundscheck check_clenshaw_recurrences(N, A, B, C)
     @boundscheck check_clenshaw_points(x, ϕ₀, f)
diff --git a/test/libfasttransformstests.jl b/test/libfasttransformstests.jl
index de9d5f78..545da7d9 100644
--- a/test/libfasttransformstests.jl
+++ b/test/libfasttransformstests.jl
@@ -8,10 +8,10 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         c = one(T) ./ (1:n)
         x = collect(-1 .+ 2*(0:n-1)/T(n))
         f = similar(x)
-        @test FastTransforms.horner!(c, x, f) == f
+        @test FastTransforms.horner!(f, c, x) == f
         fd = T[sum(c[k]*x^(k-1) for k in 1:length(c)) for x in x]
         @test f ≈ fd
-        @test FastTransforms.clenshaw!(c, x, f) == f
+        @test FastTransforms.clenshaw!(f, c, x) == f
         fd = T[sum(c[k]*cos((k-1)*acos(x)) for k in 1:length(c)) for x in x]
         @test f ≈ fd
         A = T[(2k+one(T))/(k+one(T)) for k in 0:length(c)-1]
@@ -19,7 +19,7 @@ FastTransforms.ft_set_num_threads(ceil(Int, Base.Sys.CPU_THREADS/2))
         C = T[k/(k+one(T)) for k in 0:length(c)]
         phi0 = ones(T, length(x))
         c = FastTransforms.lib_cheb2leg(c)
-        @test FastTransforms.clenshaw!(c, A, B, C, x, phi0, f) == f
+        @test FastTransforms.clenshaw!(f, c, A, B, C, x, phi0) == f
         @test f ≈ fd
     end
 

From 39c65a2f7bd7b443b58c02d563daa93af72610af Mon Sep 17 00:00:00 2001
From: Sheehan Olver <solver@mac.com>
Date: Wed, 22 Jan 2025 15:17:01 +0000
Subject: [PATCH 211/222] Make GramMatrix docs unicode (#258)

* Make GramMatrix docs unicode

* Update GramMatrix.jl

* Update GramMatrix.jl
---
 src/GramMatrix.jl | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index 8bf6dc42..d677eab5 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -8,23 +8,23 @@ abstract type AbstractGramMatrix{T} <: LayoutMatrix{T} end
     GramMatrix(W::AbstractMatrix, X::AbstractMatrix)
 
 Construct a symmetric positive-definite Gram matrix with data stored in ``W``.
-Given a family of orthogonal polynomials ``{\\bf P}(x) = \\{p_0(x), p_1(x),\\ldots\\}``
-and a continuous inner product ``\\langle f, g\\rangle``, the Gram matrix is defined by:
+Given a family of orthogonal polynomials ``𝐏(x) = {p₀(x), p₁(x),…}``
+and a continuous inner product ``⟨f, g⟩``, the Gram matrix is defined by:
 ```math
-W_{i,j} = \\langle p_{i-1}, p_{j-1}\\rangle.
+Wᵢⱼ = ⟨pᵢ₋₁, pⱼ₋₁⟩.
 ```
-Moreover, given ``X``, the transposed Jacobi matrix that satisfies ``x {\\bf P}(x) = {\\bf P}(x) X``,
-the Gram matrix satisfies the skew-symmetric rank-2 displacement equation (``X = X_{1:n, 1:n}``):
+Moreover, given ``X``, the transposed Jacobi matrix that satisfies ``x 𝐏(x) = 𝐏(x) X``,
+the Gram matrix satisfies the skew-symmetric rank-2 displacement equation (``X = X[1:n, 1:n]``):
 ```math
-X^\\top W - WX = GJG^\\top,
+XᵀW - WX = GJGᵀ,
 ```
-where ``J = \\begin{pmatrix} 0 & 1\\\\ -1 & 0\\end{pmatrix}`` and where:
+where ``J = [0 1; -1 0]`` and where:
 ```math
-G_{:, 1} = e_n,\\quad{\\rm and}\\quad G_{:, 2} = W_{n-1, :}X_{n-1, n} - X^\\top W_{:, n}.
+G[:, 1] = 𝐞_n, G_{:, 2} = W[n-1, :]X[n-1, n] - Xᵀ W[:, n].
 ```
 Fast (``O(n^2)``) Cholesky factorization of the Gram matrix returns the
-connection coefficients between ``{\\bf P}(x)`` and the polynomials ``{\\bf Q}(x)``
-orthogonal in the modified inner product, ``{\\bf P}(x) = {\\bf Q}(x) R``.
+connection coefficients between ``𝐏(x)`` and the polynomials ``𝐐(x)``
+orthogonal in the modified inner product, ``𝐏(x) = 𝐐(x) R``.
 """
 struct GramMatrix{T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix{T}} <: AbstractGramMatrix{T}
     W::WT
@@ -55,8 +55,8 @@ GramMatrix(W::WT, X::XT) where {T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix
 
 Construct a GramMatrix from modified orthogonal polynomial moments and the multiplication operator.
 In the standard (classical) normalization, ``p_0(x) = 1``, so that the moments
-``\\mu_n = \\langle p_{n-1}, 1\\rangle`` are in fact the first column of the Gram matrix.
-The recurrence is built from ``X^\\top W = WX``.
+``µ_n = ⟨ p_{n-1}, 1⟩`` are in fact the first column of the Gram matrix.
+The recurrence is built from ``XᵀW = WX``.
 """
 GramMatrix(μ::AbstractVector{T}, X::XT) where {T, XT <: AbstractMatrix{T}} = GramMatrix(μ, X, one(T))
 function GramMatrix(μ::AbstractVector{T}, X::XT, p0::T) where {T, XT <: AbstractMatrix{T}}
@@ -221,12 +221,12 @@ end
 
 Construct a Chebyshev--Gram matrix of size `(length(μ)+1)÷2` with entries:
 ```math
-W_{i,j} = \\frac{\\mu_{|i-j|+1} +\\mu_{i+j-1}}{2}.
+W_{i,j} = \\frac{µ_{|i-j|+1} +µ_{i+j-1}}{2}.
 ```
 Due to the linearization of a product of two first-kind Chebyshev polynomials,
 the Chebyshev--Gram matrix can be constructed from modified Chebyshev moments:
 ```math
-\\mu_{n} = \\langle T_{n-1}, 1\\rangle.
+µ_{n} = ⟨ T_{n-1}, 1⟩.
 ```
 Specialized construction and Cholesky factorization is given for this type.
 

From 976185097a9dc2228b601122f638133c44932d84 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 22 Jan 2025 09:48:31 -0600
Subject: [PATCH 212/222] add more modified Chebyshev moments

change log-chebyshev sign
---
 src/specialfunctions.jl | 54 ++++++++++++++++++++++++++++++++++++++---
 test/grammatrixtests.jl |  2 +-
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/src/specialfunctions.jl b/src/specialfunctions.jl
index b5915ab7..93f818e9 100644
--- a/src/specialfunctions.jl
+++ b/src/specialfunctions.jl
@@ -268,6 +268,34 @@ function chebyshevmoments1(::Type{T}, N::Int) where T
     μ
 end
 
+"""
+Modified Chebyshev moments of the first kind:
+
+```math
+    \\int_^a T_n(x) {\\rm\\,d}x.
+```
+"""
+function chebyshevmoments1(::Type{T}, N::Int, a::T) where T
+    μ = zeros(T, N)
+    μ[1] = a
+    μ[2] = a^2/2
+    θ = acos(a)
+    for i = 2:N-1
+        @inbounds μ[i+1] = (cos((i+1)*θ)/(i+1) - cos((i-1)*θ)/(i-1))/2
+    end
+    μ
+end
+
+function chebyshevmoments1(::Type{T}, N::Int, a::NTuple{L, T}, w::NTuple{M, T}) where {T, L, M}
+    @assert L == M+1
+    @assert M > 0
+    μ = zeros(T, N)
+    for k in 1:M
+        μ .+= w[k]*(chebyshevmoments1(T, N, a[k+1]) - chebyshevmoments1(T, N, a[k]))
+    end
+    μ
+end
+
 """
 Modified Chebyshev moments of the first kind with respect to the Jacobi weight:
 
@@ -291,22 +319,40 @@ end
 Modified Chebyshev moments of the first kind with respect to the logarithmic weight:
 
 ```math
-    \\int_{-1}^{+1} T_n(x) \\log\\left(\\frac{1-x}{2}\\right){\\rm\\,d}x.
+    \\int_{-1}^{+1} T_n(x) \\log\\left(\\frac{2}{1-x}\\right){\\rm\\,d}x.
 ```
 """
 function chebyshevlogmoments1(::Type{T}, N::Int) where T
     μ = zeros(T, N)
-    N > 0 && (μ[1] = -two(T))
+    N > 0 && (μ[1] = two(T))
     if N > 1
-        μ[2] = -one(T)
+        μ[2] = one(T)
         for i=1:N-2
-            cst = isodd(i) ? T(4)/T(i^2-4) : T(4)/T(i^2-1)
+            cst = isodd(i) ? T(4)/T(4-i^2) : T(4)/T(1-i^2)
             @inbounds μ[i+2] = ((i-2)*μ[i]+cst)/(i+2)
         end
     end
     μ
 end
 
+"""
+Modified Chebyshev moments of the first kind with respect to the log-Chebyshev weight:
+
+```math
+    \\int_{-1}^{+1} T_n(x) \\log\\left(\\frac{2}{1-x}\\right)\\frac{{\\rm d}x}{\\sqrt{1-x^2}}.
+```
+"""
+function chebyshevlogchebyshevmoments1(::Type{T}, N::Int) where T
+    μ = zeros(T, N)
+    N > 0 && (μ[1] = 2*log(T(2))*π)
+    if N > 1
+        for i=1:N-1
+            @inbounds μ[i+1] = T(π)/i
+        end
+    end
+    μ
+end
+
 """
 Modified Chebyshev moments of the first kind with respect to the absolute value weight:
 
diff --git a/test/grammatrixtests.jl b/test/grammatrixtests.jl
index 3ac9d626..6cba6b50 100644
--- a/test/grammatrixtests.jl
+++ b/test/grammatrixtests.jl
@@ -64,7 +64,7 @@ end
         R = plan_cheb2jac(T, n, α, β; normjac=true)*I
         @test F.U ≈ R
 
-        μ = -FastTransforms.chebyshevlogmoments1(T, 2n-1)
+        μ = FastTransforms.chebyshevlogmoments1(T, 2n-1)
         G = ChebyshevGramMatrix(μ)
         F = cholesky(G)
         @test F.L*F.L' ≈ G

From 7886a2bc831f695815deb6bde7aa85049ed4e0b9 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 22 Jan 2025 09:54:37 -0600
Subject: [PATCH 213/222] fix failed quadrature tests

---
 test/quadraturetests.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/quadraturetests.jl b/test/quadraturetests.jl
index 225c7275..bceba48d 100644
--- a/test/quadraturetests.jl
+++ b/test/quadraturetests.jl
@@ -19,7 +19,7 @@ import FastTransforms: chebyshevmoments1, chebyshevmoments2,
 
     μ = chebyshevlogmoments1(Float64, N)
     w = clenshawcurtisweights(μ)
-    @test norm(sum(w./(x .- 3)) - π^2/12) ≤ 4eps()
+    @test norm(sum(w./(3 .- x)) - π^2/12) ≤ 4eps()
 
     x = fejernodes1(Float64, N)
     μ = chebyshevmoments1(Float64, N)
@@ -32,7 +32,7 @@ import FastTransforms: chebyshevmoments1, chebyshevmoments2,
 
     μ = chebyshevlogmoments1(Float64, N)
     w = fejerweights1(μ)
-    @test norm(sum(w./(x .- 3)) - π^2/12) ≤ 4eps()
+    @test norm(sum(w./(3 .- x)) - π^2/12) ≤ 4eps()
 
     x = fejernodes2(Float64, N)
     μ = chebyshevmoments2(Float64, N)
@@ -45,5 +45,5 @@ import FastTransforms: chebyshevmoments1, chebyshevmoments2,
 
     μ = chebyshevlogmoments2(Float64, N)
     w = fejerweights2(μ)
-    @test norm(sum(w./(x .- 3)) - π^2/12) ≤ 4eps()
+    @test norm(sum(w./(3 .- x)) - π^2/12) ≤ 4eps()
 end

From d1c64a6a8d5f19c6c503b24f73b96a3eb364d172 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 22 Jan 2025 10:01:03 -0600
Subject: [PATCH 214/222] some more doc fixes

---
 src/GramMatrix.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index d677eab5..344c9bb0 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -11,7 +11,7 @@ Construct a symmetric positive-definite Gram matrix with data stored in ``W``.
 Given a family of orthogonal polynomials ``𝐏(x) = {p₀(x), p₁(x),…}``
 and a continuous inner product ``⟨f, g⟩``, the Gram matrix is defined by:
 ```math
-Wᵢⱼ = ⟨pᵢ₋₁, pⱼ₋₁⟩.
+W[i, j] = ⟨pᵢ₋₁, pⱼ₋₁⟩.
 ```
 Moreover, given ``X``, the transposed Jacobi matrix that satisfies ``x 𝐏(x) = 𝐏(x) X``,
 the Gram matrix satisfies the skew-symmetric rank-2 displacement equation (``X = X[1:n, 1:n]``):
@@ -20,7 +20,7 @@ XᵀW - WX = GJGᵀ,
 ```
 where ``J = [0 1; -1 0]`` and where:
 ```math
-G[:, 1] = 𝐞_n, G_{:, 2} = W[n-1, :]X[n-1, n] - Xᵀ W[:, n].
+G[:, 1] = 𝐞ₙ, \\quad  G[:, 2] = W[n-1, :]X[n-1, n] - Xᵀ W[:, n].
 ```
 Fast (``O(n^2)``) Cholesky factorization of the Gram matrix returns the
 connection coefficients between ``𝐏(x)`` and the polynomials ``𝐐(x)``
@@ -54,8 +54,8 @@ GramMatrix(W::WT, X::XT) where {T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix
     GramMatrix(μ::AbstractVector, X::AbstractMatrix)
 
 Construct a GramMatrix from modified orthogonal polynomial moments and the multiplication operator.
-In the standard (classical) normalization, ``p_0(x) = 1``, so that the moments
-``µ_n = ⟨ p_{n-1}, 1⟩`` are in fact the first column of the Gram matrix.
+In the standard (classical) normalization, ``p₀(x) = 1``, so that the moments
+``µ[n] = ⟨ pₙ₋₁, 1⟩`` are in fact the first column of the Gram matrix.
 The recurrence is built from ``XᵀW = WX``.
 """
 GramMatrix(μ::AbstractVector{T}, X::XT) where {T, XT <: AbstractMatrix{T}} = GramMatrix(μ, X, one(T))
@@ -221,12 +221,12 @@ end
 
 Construct a Chebyshev--Gram matrix of size `(length(μ)+1)÷2` with entries:
 ```math
-W_{i,j} = \\frac{µ_{|i-j|+1} +µ_{i+j-1}}{2}.
+2 W[i, j] = µ_{|i-j|+1} + µ_{i+j-1}.
 ```
 Due to the linearization of a product of two first-kind Chebyshev polynomials,
 the Chebyshev--Gram matrix can be constructed from modified Chebyshev moments:
 ```math
-µ_{n} = ⟨ T_{n-1}, 1⟩.
+µ[n] = ⟨ Tₙ₋₁, 1⟩.
 ```
 Specialized construction and Cholesky factorization is given for this type.
 

From 9f7b25374ccd4fcef8e45611f7696ff92cb32055 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 22 Jan 2025 10:05:19 -0600
Subject: [PATCH 215/222] fix doc for 2nd kind log moment

---
 src/specialfunctions.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/specialfunctions.jl b/src/specialfunctions.jl
index 93f818e9..ba6e0ea1 100644
--- a/src/specialfunctions.jl
+++ b/src/specialfunctions.jl
@@ -408,7 +408,7 @@ end
 Modified Chebyshev moments of the second kind with respect to the logarithmic weight:
 
 ```math
-    \\int_{-1}^{+1} U_n(x) \\log\\left(\\frac{1-x}{2}\\right){\\rm\\,d}x.
+    \\int_{-1}^{+1} U_n(x) \\log\\left(\\frac{2}{1-x}\\right){\\rm\\,d}x.
 ```
 """
 function chebyshevlogmoments2(::Type{T}, N::Int) where T

From 11cb452d6d774faf30d0be2547ebe5360e570674 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 22 Jan 2025 10:14:31 -0600
Subject: [PATCH 216/222] add reference to the docs and readme

---
 README.md         | 10 +++---
 src/GramMatrix.jl | 81 ++++++++++++++++++++++++++++++++++-------------
 2 files changed, 65 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index 0686794d..d2dfe381 100644
--- a/README.md
+++ b/README.md
@@ -162,10 +162,12 @@ julia> @time norm(ipaduatransform(paduatransform(v)) - v)/norm(v)
 
 [1]  D. Ruiz—Antolín and A. Townsend, [A nonuniform fast Fourier transform based on low rank approximation](https://doi.org/10.1137/17M1134822), *SIAM J. Sci. Comput.*, **40**:A529–A547, 2018.
 
-[2] T. S. Gutleb, S. Olver and R. M. Slevinsky, [Polynomial and rational measure modifications of orthogonal polynomials via infinite-dimensional banded matrix factorizations](https://arxiv.org/abs/2302.08448), arXiv:2302.08448, 2023.
+[2] K. Gumerov, S. Rigg, and R. M. Slevinsky, [Fast measure modification of orthogonal polynomials via matrices with displacement structure](https://arxiv.org/abs/2412.17663), arXiv:2412.17663, 2024.
 
-[3] S. Olver, R. M. Slevinsky, and A. Townsend, [Fast algorithms using orthogonal polynomials](https://doi.org/10.1017/S0962492920000045), *Acta Numerica*, **29**:573—699, 2020.
+[3] T. S. Gutleb, S. Olver and R. M. Slevinsky, [Polynomial and rational measure modifications of orthogonal polynomials via infinite-dimensional banded matrix factorizations](https://arxiv.org/abs/2302.08448), arXiv:2302.08448, 2023.
 
-[4]  R. M. Slevinsky, [Fast and backward stable transforms between spherical harmonic expansions and bivariate Fourier series](https://doi.org/10.1016/j.acha.2017.11.001), *Appl. Comput. Harmon. Anal.*, **47**:585—606, 2019.
+[4] S. Olver, R. M. Slevinsky, and A. Townsend, [Fast algorithms using orthogonal polynomials](https://doi.org/10.1017/S0962492920000045), *Acta Numerica*, **29**:573—699, 2020.
 
-[5]  R. M. Slevinsky, [Conquering the pre-computation in two-dimensional harmonic polynomial transforms](https://arxiv.org/abs/1711.07866), arXiv:1711.07866, 2017.
+[5]  R. M. Slevinsky, [Fast and backward stable transforms between spherical harmonic expansions and bivariate Fourier series](https://doi.org/10.1016/j.acha.2017.11.001), *Appl. Comput. Harmon. Anal.*, **47**:585—606, 2019.
+
+[6]  R. M. Slevinsky, [Conquering the pre-computation in two-dimensional harmonic polynomial transforms](https://arxiv.org/abs/1711.07866), arXiv:1711.07866, 2017.
diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index 344c9bb0..64b9fdfd 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -4,6 +4,23 @@ abstract type AbstractGramMatrix{T} <: LayoutMatrix{T} end
 @inline isposdef(G::AbstractGramMatrix) = true
 @inline colsupport(G::AbstractGramMatrix, j) = colrange(G, j)
 
+struct GramMatrix{T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix{T}} <: AbstractGramMatrix{T}
+    W::WT
+    X::XT
+    function GramMatrix{T, WT, XT}(W::WT, X::XT) where {T, WT, XT}
+        if size(W) ≠ size(X)
+            throw(ArgumentError("Cannot construct a GramMatrix with W and X of different sizes."))
+        end
+        if !issymmetric(W)
+            throw(ArgumentError("Cannot construct a GramMatrix with a nonsymmetric W."))
+        end
+        if bandwidths(X) ≠ (1, 1)
+            throw(ArgumentError("Cannot construct a GramMatrix with a nontridiagonal X."))
+        end
+        new{T, WT, XT}(W, X)
+    end
+end
+
 """
     GramMatrix(W::AbstractMatrix, X::AbstractMatrix)
 
@@ -25,24 +42,11 @@ G[:, 1] = 𝐞ₙ, \\quad  G[:, 2] = W[n-1, :]X[n-1, n] - Xᵀ W[:, n].
 Fast (``O(n^2)``) Cholesky factorization of the Gram matrix returns the
 connection coefficients between ``𝐏(x)`` and the polynomials ``𝐐(x)``
 orthogonal in the modified inner product, ``𝐏(x) = 𝐐(x) R``.
-"""
-struct GramMatrix{T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix{T}} <: AbstractGramMatrix{T}
-    W::WT
-    X::XT
-    function GramMatrix{T, WT, XT}(W::WT, X::XT) where {T, WT, XT}
-        if size(W) ≠ size(X)
-            throw(ArgumentError("Cannot construct a GramMatrix with W and X of different sizes."))
-        end
-        if !issymmetric(W)
-            throw(ArgumentError("Cannot construct a GramMatrix with a nonsymmetric W."))
-        end
-        if bandwidths(X) ≠ (1, 1)
-            throw(ArgumentError("Cannot construct a GramMatrix with a nontridiagonal X."))
-        end
-        new{T, WT, XT}(W, X)
-    end
-end
 
+See also [`ChebyshevGramMatrix`](@ref) for a special case.
+
+> K. Gumerov, S. Rigg, and R. M. Slevinsky, [Fast measure modification of orthogonal polynomials via matrices with displacement structure](https://arxiv.org/abs/2412.17663), arXiv:2412.17663, 2024.
+"""
 GramMatrix(W::WT, X::XT) where {T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix{T}} = GramMatrix{T, WT, XT}(W, X)
 
 @inline size(G::GramMatrix) = size(G.W)
@@ -108,6 +112,39 @@ function GramMatrix(μ::PaddedVector{T}, X::XT, p0::T) where {T, XT <: AbstractM
     return GramMatrix(Symmetric(W[1:n, 1:n], :L), eval(XT.name.name)(view(X, 1:n, 1:n)))
 end
 
+"""
+    GramMatrix(cnm1::AbstractVector, cn::AbstractVector, X::AbstractMatrix)
+
+Construct a GramMatrix from its last two columns and the multiplication operator.
+The recurrence is built from ``XᵀW = WX`` and is used in case the moment method is unstable (such as with Laguerre).
+"""
+function GramMatrix(cnm1::AbstractVector{T}, cn::AbstractVector{T}, X::XT) where {T, XT <: AbstractMatrix{T}}
+    N = length(cn)
+    @assert N == length(cnm1) == size(X, 1) == size(X, 2)
+    @assert bandwidths(X) == (1, 1)
+    W = Matrix{T}(undef, N, N)
+    if N > 0
+        @inbounds for m in 1:N
+            W[N, m] = W[m, N] = cn[m]
+        end
+    end
+    if N > 1
+        @inbounds for m in 1:N
+            W[N-1, m] = W[m, N-1] = cnm1[m]
+        end
+    end
+    @inbounds @simd for n in N:-1:3
+        W[1, n-2]  = ((X[1, 1]-X[n-1, n-1])*W[1, n-1] + X[2, 1]*W[2, n-1] - X[n, n-1]*W[1, n])/X[n-2, n-1]
+        for m in 2:n-2
+            W[m, n-2]  = (X[m-1, m]*W[m-1, n-1] + (X[m, m]-X[n-1, n-1])*W[m, n-1] + X[m+1, m]*W[m+1, n-1] - X[n, n-1]*W[m, n])/X[n-2, n-1]
+        end
+        for m in n-1:N-2
+            W[m, n-2] = W[n-2, m]
+        end
+    end
+    return GramMatrix(W, X)
+end
+
 #
 # X'W-W*X = G*J*G'
 # This returns G, where J = [0 1; -1 0], respecting the skew-symmetry of the right-hand side.
@@ -216,6 +253,11 @@ function fastcholesky!(L::BandedMatrix{T}, X, G, c, ĉ, l, v, row1, n) where T
     L[n, n] = sqrt(c[n])
 end
 
+struct ChebyshevGramMatrix{T, V <: AbstractVector{T}} <: AbstractGramMatrix{T}
+    μ::V
+    n::Int
+end
+
 """
     ChebyshevGramMatrix(μ::AbstractVector)
 
@@ -232,11 +274,6 @@ Specialized construction and Cholesky factorization is given for this type.
 
 See also [`GramMatrix`](@ref) for the general case.
 """
-struct ChebyshevGramMatrix{T, V <: AbstractVector{T}} <: AbstractGramMatrix{T}
-    μ::V
-    n::Int
-end
-
 function ChebyshevGramMatrix(μ::V) where {T, V <: AbstractVector{T}}
     n = (length(μ)+1)÷2
     ChebyshevGramMatrix{T, V}(μ, n)

From 91ef2ad0296c96c0d1cc2d107ad66caaee331b76 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 22 Jan 2025 10:15:32 -0600
Subject: [PATCH 217/222] Update GramMatrix.jl

---
 src/GramMatrix.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index 64b9fdfd..a47e0b5f 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -263,7 +263,7 @@ end
 
 Construct a Chebyshev--Gram matrix of size `(length(μ)+1)÷2` with entries:
 ```math
-2 W[i, j] = µ_{|i-j|+1} + µ_{i+j-1}.
+2 W[i, j] = µ[|i-j|+1] + µ[i+j-1].
 ```
 Due to the linearization of a product of two first-kind Chebyshev polynomials,
 the Chebyshev--Gram matrix can be constructed from modified Chebyshev moments:

From 57e56026ba89d5ab91fc0f0c53d36acd6db72b10 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Wed, 22 Jan 2025 10:23:22 -0600
Subject: [PATCH 218/222] polynomial degrees rendered poorly

---
 src/GramMatrix.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index a47e0b5f..b68ba0f0 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -28,7 +28,7 @@ Construct a symmetric positive-definite Gram matrix with data stored in ``W``.
 Given a family of orthogonal polynomials ``𝐏(x) = {p₀(x), p₁(x),…}``
 and a continuous inner product ``⟨f, g⟩``, the Gram matrix is defined by:
 ```math
-W[i, j] = ⟨pᵢ₋₁, pⱼ₋₁⟩.
+W[i, j] = ⟨p_{i-1}, p_{j-1}⟩.
 ```
 Moreover, given ``X``, the transposed Jacobi matrix that satisfies ``x 𝐏(x) = 𝐏(x) X``,
 the Gram matrix satisfies the skew-symmetric rank-2 displacement equation (``X = X[1:n, 1:n]``):
@@ -37,7 +37,7 @@ XᵀW - WX = GJGᵀ,
 ```
 where ``J = [0 1; -1 0]`` and where:
 ```math
-G[:, 1] = 𝐞ₙ, \\quad  G[:, 2] = W[n-1, :]X[n-1, n] - Xᵀ W[:, n].
+G[:, 1] = 𝐞_n, \\quad  G[:, 2] = W[n-1, :]X[n-1, n] - Xᵀ W[:, n].
 ```
 Fast (``O(n^2)``) Cholesky factorization of the Gram matrix returns the
 connection coefficients between ``𝐏(x)`` and the polynomials ``𝐐(x)``

From 2544fcdd1bc51c5d36662dcc4b04eb3a37f7fb10 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 27 Jan 2025 16:20:04 -0600
Subject: [PATCH 219/222] fixup tests, allow X'W-W*X by better colsupport

add some moment-based tests
---
 src/FastTransforms.jl   |  4 +-
 src/GramMatrix.jl       |  4 +-
 test/grammatrixtests.jl | 90 +++++++++++++++++++++++------------------
 3 files changed, 55 insertions(+), 43 deletions(-)

diff --git a/src/FastTransforms.jl b/src/FastTransforms.jl
index 4eef13ad..a9324735 100644
--- a/src/FastTransforms.jl
+++ b/src/FastTransforms.jl
@@ -19,9 +19,9 @@ import AbstractFFTs: Plan, ScaledPlan,
                      fftshift, ifftshift, rfft_output_size, brfft_output_size,
                      normalization
 
-import ArrayLayouts: colsupport, LayoutMatrix, MemoryLayout, AbstractBandedLayout
+import ArrayLayouts: rowsupport, colsupport, LayoutMatrix, MemoryLayout, AbstractBandedLayout
 
-import BandedMatrices: bandwidths
+import BandedMatrices: bandwidths, BandedLayout
 
 import FFTW: dct, dct!, idct, idct!, plan_dct!, plan_idct!,
              plan_dct, plan_idct, fftwNumber
diff --git a/src/GramMatrix.jl b/src/GramMatrix.jl
index b68ba0f0..75f0cc22 100644
--- a/src/GramMatrix.jl
+++ b/src/GramMatrix.jl
@@ -2,7 +2,6 @@ abstract type AbstractGramMatrix{T} <: LayoutMatrix{T} end
 
 @inline issymmetric(G::AbstractGramMatrix) = true
 @inline isposdef(G::AbstractGramMatrix) = true
-@inline colsupport(G::AbstractGramMatrix, j) = colrange(G, j)
 
 struct GramMatrix{T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix{T}} <: AbstractGramMatrix{T}
     W::WT
@@ -53,6 +52,8 @@ GramMatrix(W::WT, X::XT) where {T, WT <: AbstractMatrix{T}, XT <: AbstractMatrix
 @inline getindex(G::GramMatrix, i::Integer, j::Integer) = G.W[i, j]
 @inline bandwidths(G::GramMatrix) = bandwidths(G.W)
 @inline MemoryLayout(G::GramMatrix) = MemoryLayout(G.W)
+@inline rowsupport(G::GramMatrix, j) = rowsupport(MemoryLayout(G), G.W, j)
+@inline colsupport(G::GramMatrix, j) = colsupport(MemoryLayout(G), G.W, j)
 
 """
     GramMatrix(μ::AbstractVector, X::AbstractMatrix)
@@ -282,6 +283,7 @@ end
 @inline size(G::ChebyshevGramMatrix) = (G.n, G.n)
 @inline getindex(G::ChebyshevGramMatrix, i::Integer, j::Integer) = (G.μ[abs(i-j)+1] + G.μ[i+j-1])/2
 @inline bandwidths(G::ChebyshevGramMatrix{T, <: PaddedVector{T}}) where T = (length(G.μ.args[2])-1, length(G.μ.args[2])-1)
+@inline MemoryLayout(G::ChebyshevGramMatrix{T, <: PaddedVector{T}}) where T = BandedLayout()
 
 #
 # 2X'W-W*2X = G*J*G'
diff --git a/test/grammatrixtests.jl b/test/grammatrixtests.jl
index 6cba6b50..a71650f5 100644
--- a/test/grammatrixtests.jl
+++ b/test/grammatrixtests.jl
@@ -5,38 +5,43 @@ using FastTransforms, BandedMatrices, LazyArrays, LinearAlgebra, Test
     for T in (Float32, Float64, BigFloat)
         R = plan_leg2cheb(T, n; normcheb=true)*I
         X = Tridiagonal([T(n)/(2n-1) for n in 1:n-1], zeros(T, n), [T(n)/(2n+1) for n in 1:n-1]) # Legendre X
-        W = Symmetric(R'R)
-        G = GramMatrix(W, X)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ W
+        W = GramMatrix(Symmetric(R'R), X)
+        F = cholesky(W)
+        @test F.L*F.L' ≈ Symmetric(R'R)
         @test F.U ≈ R
 
         R = plan_leg2cheb(T, n; normcheb=true, normleg=true)*I
         X = SymTridiagonal(zeros(T, n), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n-1]) # normalized Legendre X
-        W = Symmetric(R'R)
-        G = GramMatrix(W, X)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ W
+        W = GramMatrix(Symmetric(R'R), X)
+        F = cholesky(W)
+        @test F.L*F.L' ≈ Symmetric(R'R)
         @test F.U ≈ R
 
         b = 4
         X = BandedMatrix(SymTridiagonal(zeros(T, n+b), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n+b-1])) # normalized Legendre X
-        W = I+X^2+X^4
-        W = Symmetric(W[1:n, 1:n])
-        X = BandedMatrix(SymTridiagonal(zeros(T, n), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n-1])) # normalized Legendre X
-        G = GramMatrix(W, X)
-        @test bandwidths(G) == (b, b)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ W
+        M = Symmetric((I+X^2+X^4)[1:n, 1:n])
+        W = GramMatrix(M, X[1:n, 1:n])
+        @test bandwidths(W) == (b, b)
+        F = cholesky(W)
+        @test F.L*F.L' ≈ M
 
         X = BandedMatrix(SymTridiagonal(T[2n-1 for n in 1:n+b], T[-n for n in 1:n+b-1])) # Laguerre X, tests nonzero diagonal
-        W = I+X^2+X^4
-        W = Symmetric(W[1:n, 1:n])
-        X = BandedMatrix(SymTridiagonal(T[2n-1 for n in 1:n], T[-n for n in 1:n-1])) # Laguerre X
-        G = GramMatrix(W, X)
-        @test bandwidths(G) == (b, b)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ W
+        M = Symmetric((I+X^2+X^4)[1:n, 1:n])
+        W = GramMatrix(M, X[1:n, 1:n])
+        @test bandwidths(W) == (b, b)
+        F = cholesky(W)
+        @test F.L*F.L' ≈ M
+
+        for μ in (PaddedVector([T(4)/3;0;-T(4)/15], 2n-1), # w(x) = 1-x^2
+                  PaddedVector([T(26)/15;0;-T(4)/105;0;T(16)/315], 2n-1), # w(x) = 1-x^2+x^4
+                  T(1) ./ (1:2n-1)) # Related to a log weight
+            X = Tridiagonal([T(n)/(2n-1) for n in 1:2n-2], zeros(T, 2n-1), [T(n)/(2n+1) for n in 1:2n-2]) # Legendre X
+            W = GramMatrix(μ, X)
+            X = Tridiagonal(X[1:n, 1:n])
+            G = FastTransforms.compute_skew_generators(W)
+            J = T[0 1; -1 0]
+            @test X'W-W*X ≈ G*J*G'
+        end
     end
     W = reshape([i for i in 1.0:n^2], n, n)
     X = reshape([i for i in 1.0:4n^2], 2n, 2n)
@@ -50,38 +55,43 @@ end
     n = 128
     for T in (Float32, Float64, BigFloat)
         μ = FastTransforms.chebyshevmoments1(T, 2n-1)
-        G = ChebyshevGramMatrix(μ)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ G
+        W = ChebyshevGramMatrix(μ)
+        F = cholesky(W)
+        @test F.L*F.L' ≈ W
         R = plan_cheb2leg(T, n; normleg=true)*I
         @test F.U ≈ R
 
         α, β = (T(0.123), T(0.456))
         μ = FastTransforms.chebyshevjacobimoments1(T, 2n-1, α, β)
-        G = ChebyshevGramMatrix(μ)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ G
+        W = ChebyshevGramMatrix(μ)
+        F = cholesky(W)
+        @test F.L*F.L' ≈ W
         R = plan_cheb2jac(T, n, α, β; normjac=true)*I
         @test F.U ≈ R
 
         μ = FastTransforms.chebyshevlogmoments1(T, 2n-1)
-        G = ChebyshevGramMatrix(μ)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ G
+        W = ChebyshevGramMatrix(μ)
+        F = cholesky(W)
+        @test F.L*F.L' ≈ W
 
         μ = FastTransforms.chebyshevabsmoments1(T, 2n-1)
-        G = ChebyshevGramMatrix(μ)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ G
+        W = ChebyshevGramMatrix(μ)
+        F = cholesky(W)
+        @test F.L*F.L' ≈ W
 
         μ = PaddedVector(T(1) ./ [1,2,3,4,5], 2n-1)
-        G = ChebyshevGramMatrix(μ)
-        @test bandwidths(G) == (4, 4)
-        F = cholesky(G)
-        @test F.L*F.L' ≈ G
+        W = ChebyshevGramMatrix(μ)
+        @test bandwidths(W) == (4, 4)
+        F = cholesky(W)
+        @test F.L*F.L' ≈ W
         μd = Vector{T}(μ)
-        Gd = ChebyshevGramMatrix(μd)
-        Fd = cholesky(Gd)
+        Wd = ChebyshevGramMatrix(μd)
+        Fd = cholesky(Wd)
         @test F.L ≈ Fd.L
+
+        X = Tridiagonal([T(1); ones(T, n-2)/2], zeros(T, n), ones(T, n-1)/2)
+        G = FastTransforms.compute_skew_generators(W)
+        J = T[0 1; -1 0]
+        @test 2*(X'W-W*X) ≈ G*J*G'
     end
 end

From cd46f52517003e5a9389e7809306b0a616781f44 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Mon, 27 Jan 2025 16:22:52 -0600
Subject: [PATCH 220/222] add sym and posdef tests

---
 test/grammatrixtests.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/grammatrixtests.jl b/test/grammatrixtests.jl
index a71650f5..184ff75c 100644
--- a/test/grammatrixtests.jl
+++ b/test/grammatrixtests.jl
@@ -6,6 +6,8 @@ using FastTransforms, BandedMatrices, LazyArrays, LinearAlgebra, Test
         R = plan_leg2cheb(T, n; normcheb=true)*I
         X = Tridiagonal([T(n)/(2n-1) for n in 1:n-1], zeros(T, n), [T(n)/(2n+1) for n in 1:n-1]) # Legendre X
         W = GramMatrix(Symmetric(R'R), X)
+        @test issymmetric(W)
+        @test isposdef(W)
         F = cholesky(W)
         @test F.L*F.L' ≈ Symmetric(R'R)
         @test F.U ≈ R

From 526155c7f9e198f123a8b91291e407efe02952e3 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Tue, 28 Jan 2025 09:29:40 -0600
Subject: [PATCH 221/222] restore tests?

---
 test/grammatrixtests.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/grammatrixtests.jl b/test/grammatrixtests.jl
index 184ff75c..93b62039 100644
--- a/test/grammatrixtests.jl
+++ b/test/grammatrixtests.jl
@@ -22,14 +22,16 @@ using FastTransforms, BandedMatrices, LazyArrays, LinearAlgebra, Test
         b = 4
         X = BandedMatrix(SymTridiagonal(zeros(T, n+b), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n+b-1])) # normalized Legendre X
         M = Symmetric((I+X^2+X^4)[1:n, 1:n])
-        W = GramMatrix(M, X[1:n, 1:n])
+        X = BandedMatrix(SymTridiagonal(zeros(T, n), [sqrt(T(n)^2/(4*n^2-1)) for n in 1:n-1])) # normalized Legendre X
+        W = GramMatrix(M, X)
         @test bandwidths(W) == (b, b)
         F = cholesky(W)
         @test F.L*F.L' ≈ M
 
         X = BandedMatrix(SymTridiagonal(T[2n-1 for n in 1:n+b], T[-n for n in 1:n+b-1])) # Laguerre X, tests nonzero diagonal
         M = Symmetric((I+X^2+X^4)[1:n, 1:n])
-        W = GramMatrix(M, X[1:n, 1:n])
+        X = BandedMatrix(SymTridiagonal(T[2n-1 for n in 1:n], T[-n for n in 1:n-1])) # Laguerre X, tests nonzero diagonal
+        W = GramMatrix(M, X)
         @test bandwidths(W) == (b, b)
         F = cholesky(W)
         @test F.L*F.L' ≈ M

From 2446237a3f27028a1b0c9e527a1a22a55d4192f2 Mon Sep 17 00:00:00 2001
From: MikaelSlevinsky <richard.slevinsky@umanitoba.ca>
Date: Fri, 31 Jan 2025 12:58:26 -0600
Subject: [PATCH 222/222] remove docs spacing

---
 docs/src/index.md | 82 ++---------------------------------------------
 1 file changed, 2 insertions(+), 80 deletions(-)

diff --git a/docs/src/index.md b/docs/src/index.md
index 06d70e5d..3c2c2844 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -10,13 +10,10 @@ This package provides a Julia wrapper for the [C library](https://github.com/Mik
 
 For this documentation, please see the documentation for [FastTransforms](https://github.com/MikaelSlevinsky/FastTransforms). Most transforms have separate forward and inverse plans. In some instances, however, the inverse is in the sense of least-squares, and therefore only the forward transform is planned.
 
-### Fast Cholesky factorization of the Gram matrix
+### Modified orthogonal polynomials via fast Cholesky factorization of the Gram matrix
 
 ```@docs
 GramMatrix
-```
-
-```@docs
 ChebyshevGramMatrix
 ```
 
@@ -24,29 +21,11 @@ ChebyshevGramMatrix
 
 ```@docs
 nufft1
-```
-
-```@docs
 nufft2
-```
-
-```@docs
 nufft3
-```
-
-```@docs
 inufft1
-```
-
-```@docs
 inufft2
-```
-
-```@docs
 paduatransform
-```
-
-```@docs
 ipaduatransform
 ```
 
@@ -54,13 +33,7 @@ ipaduatransform
 
 ```@docs
 gaunt
-```
-
-```@docs
 paduapoints
-```
-
-```@docs
 sphevaluate
 ```
 
@@ -70,29 +43,11 @@ sphevaluate
 
 ```@docs
 FastTransforms.half
-```
-
-```@docs
 FastTransforms.two
-```
-
-```@docs
 FastTransforms.δ
-```
-
-```@docs
 FastTransforms.Λ
-```
-
-```@docs
 FastTransforms.lambertw
-```
-
-```@docs
 FastTransforms.pochhammer
-```
-
-```@docs
 FastTransforms.stirlingseries
 ```
 
@@ -100,53 +55,20 @@ FastTransforms.stirlingseries
 
 ```@docs
 FastTransforms.clenshawcurtisnodes
-```
-
-```@docs
 FastTransforms.clenshawcurtisweights
-```
-
-```@docs
 FastTransforms.fejernodes1
-```
-
-```@docs
 FastTransforms.fejerweights1
-```
-
-```@docs
 FastTransforms.fejernodes2
-```
-
-```@docs
 FastTransforms.fejerweights2
-```
-
-```@docs
 FastTransforms.chebyshevmoments1
-```
-
-```@docs
 FastTransforms.chebyshevjacobimoments1
-```
-
-```@docs
 FastTransforms.chebyshevlogmoments1
-```
-
-```@docs
 FastTransforms.chebyshevmoments2
-```
-
-```@docs
 FastTransforms.chebyshevjacobimoments2
-```
-
-```@docs
 FastTransforms.chebyshevlogmoments2
 ```
 
-### Elliptic
+### Elliptic Submodule
 
 ```@docs
 FastTransforms.Elliptic