1
+ using Pkg
1
2
# Replace python environment to suit your needs
2
3
ENV [" PYTHON" ] = " /home/mysterio/miniconda3/envs/pydata/bin/python"
3
4
Pkg. build (" PyCall" ) # Build PyCall to suit the specified Python env
4
5
5
6
using PyCall
6
7
using Plots
7
8
using LinearAlgebra
9
+ using Statistics
8
10
using BenchmarkTools
9
11
using Distances
10
12
@@ -26,12 +28,12 @@ ran_c = randn(ran_k, ran_k)
26
28
"""
27
29
function sum_of_squares (x:: Array{Float64,2} , labels:: Array{Int64,1} , centre:: Array , k:: Int )
28
30
ss = 0
29
-
31
+
30
32
for j = 1 : k
31
33
group_data = x[findall (labels .== j), :]
32
34
group_centroid_matrix = convert (Matrix, centre[j, :]' )
33
35
group_distance = pairwise (Euclidean (), group_data, group_centroid_matrix, dims= 1 )
34
-
36
+
35
37
ss += sum (group_distance .^ 2 )
36
38
end
37
39
@@ -56,16 +58,16 @@ function Kmeans(design_matrix::Array{Float64, 2}, k::Int64; max_iters::Int64=300
56
58
distances = zeros (n_row)
57
59
58
60
J_previous = Inf64
59
-
61
+
60
62
# Update centroids & labels with closest members until convergence
61
63
for iter = 1 : max_iters
62
64
nearest_neighbour = pairwise (Euclidean (), design_matrix, centroids, dims= 1 )
63
-
65
+
64
66
min_val_idx = findmin .(eachrow (nearest_neighbour))
65
67
66
68
distances = [x[1 ] for x in min_val_idx]
67
69
labels = [x[2 ] for x in min_val_idx]
68
-
70
+
69
71
centroids = [ mean ( X[findall (labels .== j), : ], dims = 1 ) for j = 1 : k]
70
72
centroids = reduce (vcat, centroids)
71
73
@@ -75,7 +77,7 @@ function Kmeans(design_matrix::Array{Float64, 2}, k::Int64; max_iters::Int64=300
75
77
# Show progress and terminate if J stopped decreasing.
76
78
println (" Iteration " , iter, " : Jclust = " , J, " ." )
77
79
end ;
78
-
80
+
79
81
# Final Step 5: Check for convergence
80
82
if iter > 1 && abs (J - J_previous) < (tol * J)
81
83
# TODO : Calculate the sum of squares
@@ -84,9 +86,9 @@ function Kmeans(design_matrix::Array{Float64, 2}, k::Int64; max_iters::Int64=300
84
86
if verbose
85
87
println (" Successfully terminated with convergence." )
86
88
end
87
-
89
+
88
90
return labels, centroids, sum_squares
89
-
91
+
90
92
elseif iter == max_iters && abs (J - J_previous) > (tol * J)
91
93
throw (error (" Failed to converge Check data and/or implementation or increase max_iter." ))
92
94
end ;
100
102
Kmeans (X, 3 )
101
103
102
104
105
+ @btime begin
106
+ num = []
107
+ ss = []
108
+ for i = 2 : 10
109
+ l, c, s = Kmeans (X, i, verbose= false )
110
+ push! (num, i)
111
+ push! (ss, s)
112
+ end
113
+ end
114
+
115
+
116
+ plot (num, ss, ylabel= " Sum of Squares" , xlabel= " Number of Iterations" ,
117
+ title = " Test For Heterogeneity Per Iteration" , legend= false )
118
+
0 commit comments