Update clustering.py

- Improved documentation - Added better controls - Added predict_fuzzy method - Better management of predict, predict_fuzzy, predict_proba without first fitting
AndreaCampagner · Jan 28, 2025 · 4c02689 · 4c02689
1 parent f6fb535
commit 4c02689
Showing 1 changed file with 58 additions and 9 deletions.
diff --git a/softpy/fuzzy/clustering.py b/softpy/fuzzy/clustering.py
@@ -3,14 +3,46 @@
 from sklearn.utils import resample
 from sklearn.base import BaseEstimator, ClusterMixin
 from .fuzzyset import DiscreteFuzzySet
+from .fuzzy_partition import FuzzyPartition
+from typing import Callable
 
 class FuzzyCMeans(BaseEstimator,ClusterMixin):
   '''
-  Implements the fuzzy c-means algorithm. The interface is compatible with the scikit-learn library. It allows to set the number of clusters,
-  a tolerance degree (for avoiding errors in numerical operations), the number of iterations before termination, the clustering metric as well
-  as the fuzzifier degree.
+  Implements the fuzzy c-means algorithm. The interface is compatible with the scikit-learn library.
+  
+  Parameters
+  ----------
+  :param n_clusters: The number of centroids used to define the number of clusters
+  :type n_clusters: int, default=2
+  
+  :param epsilon: Error tolerance parameter for avoid division by 0
+  :type n_clusters: float, default=0.001
+
+  :param iters: The number of iterations for the optimization routine
+  :type iters: int, default=100
+
+  :param random_state: Random seed parameter for repeatability. None corresponds to randomized execution.
+  :type random_state: int|RandomState|None, default=None
+
+  :param metric: metric for the computation of distances in the fuzzy c-means algorithm
+  :type metric: str|Callable, default=euclidean
+
+  :param fuzzifier: paramater that controls the hardness of the clustering result. Values closer to 1 will enforce a clustering result closer to that obtain with standard k-means
+  :type fuzzifier: np.number (should be larger than 1), default=2
   '''
-  def __init__(self, n_clusters=2, epsilon=0.001, iters=100, random_state=None, metric='euclidean', fuzzifier=2):
+  def __init__(self, n_clusters: int=2, epsilon: float=0.001, iters: int=100, random_state:int|np.random.RandomState|None=None, metric: str|Callable ='euclidean', fuzzifier: np.number=2):
+    if n_clusters <= 1:
+      raise ValueError("n_clusters must be an int larger than 1, was %d" % n_clusters)
+
+    if fuzzifier <= 1:
+      raise ValueError("fuzzifier must be a number larger than 1, was %.2f" % fuzzifier)
+
+    if epsilon <= 0:
+      raise ValueError("epsilon must be a number larger than 0, was %.2f" % epsilon)
+
+    if iters < 1:
+      raise ValueError("iters must be an int larger than 0, was %d" % iters)
+
     self.n_clusters = n_clusters
     self.epsilon = epsilon
     self.iters = iters
@@ -39,7 +71,9 @@ def predict(self, X):
     ''' 
     For each given instance returns the cluster with maximum membership degree. The fit method must have been called before executing this method.
     '''
-    if not self.fitted:
+    try:
+      self.fitted
+    except:
       raise RuntimeError("Estimator must be fitted")
     dists = pairwise_distances(X, self.centroids, metric=self.metric)
     self.cluster_assignments = (dists+self.epsilon)**(2/(1-self.fuzzifier))/(np.sum((dists+self.epsilon)**(2/(1-self.fuzzifier)), axis=1)[:,np.newaxis])
@@ -50,14 +84,29 @@ def predict_proba(self, X):
     ''' 
     For each given instance returns the membership degrees to the computed clusters. The fit method must have been called before executing this method.
     '''
-    if not self.fitted:
+    try:
+      self.fitted
+    except:
+      raise RuntimeError("Estimator must be fitted")
+    dists = pairwise_distances(X, self.centroids, metric=self.metric)
+    self.cluster_assignments = (dists+self.epsilon)**(2/(1-self.fuzzifier))/(np.sum((dists+self.epsilon)**(2/(1-self.fuzzifier)), axis=1)[:,np.newaxis])
+    return self.cluster_assignments
+
+  def predict_fuzzy(self, X, name="fcm"):
+    ''' 
+    For each given instance returns the membership degrees to the computed clusters. The fit method must have been called before executing this method.
+    '''
+    try:
+      self.fitted
+    except:
       raise RuntimeError("Estimator must be fitted")
     dists = pairwise_distances(X, self.centroids, metric=self.metric)
     self.cluster_assignments = (dists+self.epsilon)**(2/(1-self.fuzzifier))/(np.sum((dists+self.epsilon)**(2/(1-self.fuzzifier)), axis=1)[:,np.newaxis])
-    fuzzy_sets = []
+    fuzzy_sets = {}
     for cl in range(self.cluster_assignments.shape[1]):
-      fuzzy_sets.append(DiscreteFuzzySet(list(range(X.shape[0])), self.cluster_assignments[:,cl]))
-    return np.array(fuzzy_sets)
+      fuzzy_sets[str(cl)] = DiscreteFuzzySet(list(range(X.shape[0])), self.cluster_assignments[:,cl])
+    fp = FuzzyPartition(name, fuzzy_sets)
+    return fp
 
   def fit_predict(self, X, y=None):
     self.fit(X,y)