Skip to content

Commit

Permalink
fixed mapper algo + plots w/ layouts
Browse files Browse the repository at this point in the history
  • Loading branch information
wildart committed Mar 25, 2019
1 parent e8804ea commit 108d027
Show file tree
Hide file tree
Showing 6 changed files with 311 additions and 149 deletions.
6 changes: 5 additions & 1 deletion src/TDA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ module TDA

using ComputationalHomology
using RecipesBase
import Random

include("utils.jl")
include("pdiag.jl")
include("nerve.jl")
include("graph.jl")
include("clustering.jl")
include("mapper.jl")
include("layouts.jl")

end
82 changes: 82 additions & 0 deletions src/clustering.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import Clustering
import Distances
import LinearAlgebra: dot

"""
noselection(algo, data)
Perform no clustering selection.
"""
noselection(algo::Function, data::AbstractMatrix{<:Real}) = Clustering.assignments(algo(data))

"""
Knee point detection
"""
function knee(x)
n = length(x)
pts = hcat(1:n, x)'
# calculate line vector
lvec = pts[:,end] - pts[:,1]
lvec ./= sqrt(sum(lvec.^2))
# find a vector parallel to the line vector
vecfst = pts .- pts[:,1]
vecfstpar = lvec * mapslices(v->dot(v, lvec), vecfst, dims=1)
# calculate a length of a projection to the line vector
ldist = vec(sqrt.(sum((vecfst - vecfstpar).^2, dims=1)))
# get maximal projection
return findmax(ldist)
end

function getmaxk(N::Int; kwargs...)
N <= 5 && return 2

# setup parameters
Kmax = 10 # default
for (p,v) in kwargs
if p == :maxk
Kmax = v
end
end
return min(floor(Int, N/2), Kmax)
end

"""
elbow(algo, data)
Perform automatic selection of the best clustering of `data` by `algo` using elbow method.
Clustering algorithm `algo` must be a function with one argument which determines number of clusters.
"""
function elbow(algo::Function, data::AbstractMatrix{<:Real}; kwargs...)
N = size(data,2)
Kmax = getmaxk(N; kwargs...)

S = [let C = algo(data, k)
# println("$k => ", C.centers)
# println("$k => ", C.costs)
C.totalcost => Clustering.assignments(C)
end
for k in 2:Kmax]
pushfirst!(S, sum(mapslices(p->sum(p.^2), data .- mean(data, dims=2), dims=1))=>fill(1,N))

# return a clustering corespondig to an elbow point
return S[knee(map(first, S)) |> last] |> last
end

"""
silhouette(algo, data)
Perform automatic selection of the best clustering of `data` by `algo` using silhouette method.
Clustering algorithm `algo` must be a function with one argument which determines number of clusters.
"""
function silhouette(algo::Function, data::AbstractMatrix{<:Real}; kwargs...)
N = size(data,2)
Kmax = getmaxk(N; kwargs...)
S = [let C = algo(data, k),
D = Distances.pairwise(Distances.Euclidean(), data)
mean(Clustering.silhouettes(C, D)) => Clustering.assignments(C)
end
for k in 2:Kmax]

# return a clustering corespondig to a maximum average silhouette
return S[findmax(map(first, S)) |> last] |> last
end
109 changes: 109 additions & 0 deletions src/layouts.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""
spring_layout(cplx[; C=2.0, maxiter=100, seed=0])
Calculate a 1D simplicial complex layout using spring/repulsion model.
This borrows with modifications from [GraphPlot](https://github.com/JuliaGraphs/GraphPlot.jl)
# Arguments
- `cplx::SimplicialComplex`: the simplicial complex.
- `C::Real`: the constant to fiddle with density of resulting layout.
- `maxiter::Integer`: the number of iterations we apply the forces.
- `starttemp::Real`: the initial "temperature", controls movement per iteration.
- `seed::Integer`: the seed for pseudorandom generation of locations (default = 0).
"""
function spring_layout(cplx::SimplicialComplex, locs_x, locs_y; C=2.0, maxiter=100, starttemp=2.0) where {T<:Integer}

N = size(cplx, 0)
adj_matrix = adjacency_matrix(cplx)

# The optimal distance bewteen vertices
K = C * sqrt(4.0 / N)

# Store forces and apply at end of iteration all at once
force_x = zeros(N)
force_y = zeros(N)

@inbounds for iter = 1:maxiter
# Calculate forces
for i = 1:N
force_vec_x = 0.0
force_vec_y = 0.0
for j = 1:N
i == j && continue
d_x = locs_x[j] - locs_x[i]
d_y = locs_y[j] - locs_y[i]
d = sqrt(d_x^2 + d_y^2)
if adj_matrix[i,j] != zero(eltype(adj_matrix)) || adj_matrix[j,i] != zero(eltype(adj_matrix))
# F = d^2 / K - K^2 / d
F_d = d / K - K^2 / d^2
else
# Just repulsive
# F = -K^2 / d^
F_d = -K^2 / d^2
end
# d / sin θ = d_y/d = fy/F
# F /| dy fy -> fy = F*d_y/d
# / | cos θ = d_x/d = fx/F
# /--- -> fx = F*d_x/d
# dx fx
force_vec_x += F_d*d_x
force_vec_y += F_d*d_y
end
force_x[i] = force_vec_x
force_y[i] = force_vec_y
end
# Cool down
TEMP = starttemp / iter
# Now apply them, but limit to temperature
for i = 1:N
force_mag = sqrt(force_x[i]^2 + force_y[i]^2)
scale = min(force_mag, TEMP)/force_mag
locs_x[i] += force_x[i] * scale
#locs_x[i] = max(-1.0, min(locs_x[i], +1.0))
locs_y[i] += force_y[i] * scale
#locs_y[i] = max(-1.0, min(locs_y[i], +1.0))
end
end

# Scale to unit square
min_x, max_x = minimum(locs_x), maximum(locs_x)
min_y, max_y = minimum(locs_y), maximum(locs_y)
function scaler(z, a, b)
2.0*((z - a)/(b - a)) - 1.0
end
map!(z -> scaler(z, min_x, max_x), locs_x, locs_x)
map!(z -> scaler(z, min_y, max_y), locs_y, locs_y)

return locs_x, locs_y
end

import Random
function spring_layout(cplx::SimplicialComplex; seed::Integer=0, xs=nothing, ys=nothing, kwargs...)
N = size(cplx, 0)
rng = Random.MersenneTwister(seed)
spring_layout(cplx, xs === nothing ? 2 .* rand(rng, N) .- 1.0 : xs,
ys === nothing ? 2 .* rand(rng, N) .- 1.0 : ys ; kwargs...)
end

function spring_layout(mpr::Mapper; seed::Integer=0, random::Bool=false, kwargs...)
xs = random ? nothing : mpr.centers[1,:]
ys = random ? nothing : mpr.centers[2,:]
spring_layout(mpr.complex; xs=xs, ys=ys, kwargs...)
end

"""
circular_layout(cplx)
Arranges verticies of a 1D simplicial complex on a circle.
"""
circular_layout(cplx::SimplicialComplex) = circlepoints(size(cplx, 0)+1, 1.0)
circular_layout(mpr::Mapper) = circular_layout(mpr.complex)

function constant_layout(data::AbstractMatrix)
inner_layout(cplx::SimplicialComplex) = (data[1,:], data[2,:])
return inner_layout
end

constant_layout(mpr::Mapper) = (mpr.centers[1,:], mpr.centers[2,:])
98 changes: 67 additions & 31 deletions src/mapper.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,58 @@ struct Mapper
complex::SimplicialComplex
filter::Vector{<:Real}
patches::Vector{Vector{Int}}
centers::Matrix{<:Real}
end

Base.show(io::IO, mpr::Mapper) = show(io, "Mapper[$(length(mpr.patches))]")

"""
centers(mpr, X)
Return centers of cover patches for a Mapper complex `mpr` calculated from dataset `X`.
"""
function centers(mpr::Mapper, X::AbstractMatrix{<:Real})
return [mean(view(X, : ,p), dims=2) for p in mpr.patches]
end

"""
mapper(X::AbstractMatrix{<:Real}[; filter = eccentricity])
Calculate a simplicial complex of `X` dataset using Mapper algorithm.
"""
function mapper(X::AbstractMatrix{<:Real};
filter::Function = eccentricity,
cover::Function = balanced,
clustering::Function = Clustering.kmeans,
cluster_selection::Function = silhouette)
function mapper(X::AbstractMatrix{<:Real}; kwargs...)

# setup parameters
filterfn = eccentricity
coverfn = balanced
clusterfn = Clustering.kmeans
clusterselectionfn = silhouette
for (p,v) in kwargs
if p == :filter
@assert isa(v, Function) "`$p` parameter must be function"
filterfn = v
elseif p == :cover
@assert isa(v, Function) "`$p` parameter must be function"
coverfn = v
elseif p == :clustering
@assert isa(v, Function) "`$p` parameter must be function"
clusterfn = v
elseif p == :clustselection
@assert isa(v, Function) "`$p` parameter must be function"
clusterselectionfn = v
elseif p == :seed
Random.seed!(v)
end
end

# calculate filter range
flt = filter(X)
flt = filterfn(X)

# construct cover of the filter range
covering = cover(flt)
covering = coverfn(flt; kwargs...)

# using clustering algorithm create patches from cover elements
patches = Vector{Int}[]
for c in covering
lbls = cluster_selection(clustering, view(X, :, c))
for i in 1:length(unique(lbls))
println(c)
if length(c) == 1
push!(patches, c)
end
lbls = clusterselectionfn(clusterfn, view(X, :, c); kwargs...)
plen = length(unique(lbls))
println(plen)
for i in 1:plen
push!(patches, c[findall(isequal(i), lbls)])
end
end
Expand All @@ -50,6 +67,7 @@ function mapper(X::AbstractMatrix{<:Real};
P = length(patches)
cplx = SimplicialComplex([Simplex(i) for i in 1:P]...)
for i in 1:P
println("$i => ", patches[i])
for j in i+1:P
overlap = intersect(patches[i], patches[j])
if length(overlap) > 0
Expand All @@ -58,12 +76,16 @@ function mapper(X::AbstractMatrix{<:Real};
end
end

return Mapper(cplx, flt, patches)
# calculate centers of cover patches
cntrs = hcat((mean(view(X, : ,p), dims=2) for p in patches)...)

return Mapper(cplx, flt, patches, cntrs)
end

@recipe function f(mpr::Mapper; complex_layout=circular_layout)
@recipe function f(mpr::Mapper; complex_layout = circular_layout,
minvsize = 15, maxvsize = 35)

xpos, ypos = complex_layout(mpr.complex)
xpos, ypos = complex_layout(mpr)

# set image limits
xlims --> extrema(xpos) .* 1.2
Expand All @@ -81,16 +103,30 @@ end
end
end

# show nodes
# calculate vertex attribues
n = length(mpr.patches)
xcrd = zeros(n)
ycrd = zeros(n)
zcol = zeros(n)
msize = fill(1,n)
for (i, p) in enumerate(mpr.patches)
mcolor = mean(mpr.filter[p])
msize = length(p)
@series begin
seriestype := :scatter
markersize := msize
label --> "$msize"
zcolor := mcolor
[xpos[i]], [ypos[i]]
end
zcol[i] = mean(mpr.filter[p])
msize[i] = length(p)
xcrd[i] = xpos[i]
ycrd[i] = ypos[i]
end
manno = map(string, msize)
smin, smax = extrema(msize)
srng = smax-smin
msize = (maxvsize-minvsize).*(msize .- smin)./srng .+ minvsize

# show nodes
@series begin
seriestype := :scatter
markersize := msize
label --> ""
zcolor := zcol
series_annotations := manno
xcrd, ycrd
end
end
Loading

0 comments on commit 108d027

Please sign in to comment.