Skip to content

Commit

Permalink
Update clustering.py
Browse files Browse the repository at this point in the history
- Improved documentation
- Added better controls
- Added predict_fuzzy method
- Better management of predict, predict_fuzzy, predict_proba without first fitting
  • Loading branch information
AndreaCampagner committed Jan 28, 2025
1 parent f6fb535 commit 4c02689
Showing 1 changed file with 58 additions and 9 deletions.
67 changes: 58 additions & 9 deletions softpy/fuzzy/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,46 @@
from sklearn.utils import resample
from sklearn.base import BaseEstimator, ClusterMixin
from .fuzzyset import DiscreteFuzzySet
from .fuzzy_partition import FuzzyPartition
from typing import Callable

class FuzzyCMeans(BaseEstimator,ClusterMixin):
'''
Implements the fuzzy c-means algorithm. The interface is compatible with the scikit-learn library. It allows to set the number of clusters,
a tolerance degree (for avoiding errors in numerical operations), the number of iterations before termination, the clustering metric as well
as the fuzzifier degree.
Implements the fuzzy c-means algorithm. The interface is compatible with the scikit-learn library.
Parameters
----------
:param n_clusters: The number of centroids used to define the number of clusters
:type n_clusters: int, default=2
:param epsilon: Error tolerance parameter for avoid division by 0
:type n_clusters: float, default=0.001
:param iters: The number of iterations for the optimization routine
:type iters: int, default=100
:param random_state: Random seed parameter for repeatability. None corresponds to randomized execution.
:type random_state: int|RandomState|None, default=None
:param metric: metric for the computation of distances in the fuzzy c-means algorithm
:type metric: str|Callable, default=euclidean
:param fuzzifier: paramater that controls the hardness of the clustering result. Values closer to 1 will enforce a clustering result closer to that obtain with standard k-means
:type fuzzifier: np.number (should be larger than 1), default=2
'''
def __init__(self, n_clusters=2, epsilon=0.001, iters=100, random_state=None, metric='euclidean', fuzzifier=2):
def __init__(self, n_clusters: int=2, epsilon: float=0.001, iters: int=100, random_state:int|np.random.RandomState|None=None, metric: str|Callable ='euclidean', fuzzifier: np.number=2):
if n_clusters <= 1:
raise ValueError("n_clusters must be an int larger than 1, was %d" % n_clusters)

if fuzzifier <= 1:
raise ValueError("fuzzifier must be a number larger than 1, was %.2f" % fuzzifier)

if epsilon <= 0:
raise ValueError("epsilon must be a number larger than 0, was %.2f" % epsilon)

if iters < 1:
raise ValueError("iters must be an int larger than 0, was %d" % iters)

self.n_clusters = n_clusters
self.epsilon = epsilon
self.iters = iters
Expand Down Expand Up @@ -39,7 +71,9 @@ def predict(self, X):
'''
For each given instance returns the cluster with maximum membership degree. The fit method must have been called before executing this method.
'''
if not self.fitted:
try:
self.fitted
except:
raise RuntimeError("Estimator must be fitted")
dists = pairwise_distances(X, self.centroids, metric=self.metric)
self.cluster_assignments = (dists+self.epsilon)**(2/(1-self.fuzzifier))/(np.sum((dists+self.epsilon)**(2/(1-self.fuzzifier)), axis=1)[:,np.newaxis])
Expand All @@ -50,14 +84,29 @@ def predict_proba(self, X):
'''
For each given instance returns the membership degrees to the computed clusters. The fit method must have been called before executing this method.
'''
if not self.fitted:
try:
self.fitted
except:
raise RuntimeError("Estimator must be fitted")
dists = pairwise_distances(X, self.centroids, metric=self.metric)
self.cluster_assignments = (dists+self.epsilon)**(2/(1-self.fuzzifier))/(np.sum((dists+self.epsilon)**(2/(1-self.fuzzifier)), axis=1)[:,np.newaxis])
return self.cluster_assignments

def predict_fuzzy(self, X, name="fcm"):
'''
For each given instance returns the membership degrees to the computed clusters. The fit method must have been called before executing this method.
'''
try:
self.fitted
except:
raise RuntimeError("Estimator must be fitted")
dists = pairwise_distances(X, self.centroids, metric=self.metric)
self.cluster_assignments = (dists+self.epsilon)**(2/(1-self.fuzzifier))/(np.sum((dists+self.epsilon)**(2/(1-self.fuzzifier)), axis=1)[:,np.newaxis])
fuzzy_sets = []
fuzzy_sets = {}
for cl in range(self.cluster_assignments.shape[1]):
fuzzy_sets.append(DiscreteFuzzySet(list(range(X.shape[0])), self.cluster_assignments[:,cl]))
return np.array(fuzzy_sets)
fuzzy_sets[str(cl)] = DiscreteFuzzySet(list(range(X.shape[0])), self.cluster_assignments[:,cl])
fp = FuzzyPartition(name, fuzzy_sets)
return fp

def fit_predict(self, X, y=None):
self.fit(X,y)
Expand Down

0 comments on commit 4c02689

Please sign in to comment.