Add the config files for the setting of random state.

zsunAI · May 27, 2020 · aeada6c · aeada6c
1 parent 0fb166e
commit aeada6c
Show file tree

Hide file tree

Showing 4 changed files with 164 additions and 65 deletions.
diff --git a/FAE/FeatureAnalysis/Classifier.py b/FAE/FeatureAnalysis/Classifier.py
@@ -1,10 +1,14 @@
+"""
+All rights reserved.
+-- Yang Song.
+"""
 import os
 import pickle
 from copy import deepcopy
 
 import pandas as pd
 import numpy as np
-from abc import ABCMeta,abstractmethod
+from abc import ABCMeta, abstractmethod
 from sklearn.svm import SVC
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
@@ -16,19 +20,22 @@
 
 from FAE.DataContainer.DataContainer import DataContainer
 from Utility.EcLog import eclog
+from Utility.Constants import *
+from FAE.HyperParameterConfig.HyperParamManager import RANDOM_SEED
+
 
 class Classifier:
-    '''
+    """
     This is the base class of the classifer. All the specific classifier need to be artributed from this base class.
-    '''
+    """
     def __init__(self):
         self.__model = None
         self._x = np.array([])
         self._y = np.array([])
         self._data_container = DataContainer()
         self.logger = eclog(os.path.split(__file__)[-1]).GetLogger()
 
-    def __deepcopy__(self, memodict={}):
+    def __deepcopy__(self):
         copy_classifier = type(self)()
         copy_classifier._data_container = deepcopy(self._data_container)
         copy_classifier._x, copy_classifier._y = deepcopy(self._x), deepcopy(self._y)
@@ -107,21 +114,22 @@ def Load(self, store_path):
     def GetName(self):
         pass
 
+
 class SVM(Classifier):
     def __init__(self, **kwargs):
         super(SVM, self).__init__()
-        if not 'kernel' in kwargs.keys():
+        if 'kernel' not in kwargs.keys():
             kwargs['kernel'] = 'linear'
-        if not 'C' in kwargs.keys():
+        if 'C' not in kwargs.keys():
             kwargs['C'] = 1.0
-        if not 'probability' in kwargs.keys():
+        if 'probability' not in kwargs.keys():
             kwargs['probability'] = True
-        super(SVM, self).SetModel(SVC(random_state=42, **kwargs))
+        super(SVM, self).SetModel(SVC(random_state=RANDOM_SEED[CLASSIFIER_SVM], **kwargs))
 
-        self.__name = 'SVM_'+ kwargs['kernel'] + '_C_' + '{:.3f}'.format(kwargs['C'])
+        self.__name = 'SVM_' + kwargs['kernel'] + '_C_' + '{:.3f}'.format(kwargs['C'])
 
     def GetName(self):
-        return 'SVM'
+        return CLASSIFIER_SVM
 
     def Predict(self, x, is_probability=True):
         if is_probability:
@@ -130,10 +138,10 @@ def Predict(self, x, is_probability=True):
             return super(SVM, self).Predict(x)
 
     def GetDescription(self):
-        text = "We used support vector machine (SVM) as the classifier. SVM was an effective and robust classifier to " \
-               "build the model. The kernel function has the ability to map the features into a higher dimension to search " \
-               "the hyper-plane for separating the cases with different labels. Here we used the linear kernel function because " \
-               "it was easier to explain the coefficients of the features for the final model. "
+        text = "We used support vector machine (SVM) as the classifier. SVM was an effective and robust classifier " \
+               "to build the model. The kernel function has the ability to map the features into a higher dimension " \
+               "to search the hyper-plane for separating the cases with different labels. Here we used the linear " \
+               "kernel function because it was easier to explain the coefficients of the features for the final model. "
         return text
 
     def Save(self, store_folder):
@@ -144,17 +152,19 @@ def Save(self, store_folder):
         # Save the coefficients
         try:
             coef_path = os.path.join(store_folder, 'SVM_coef.csv')
-            df = pd.DataFrame(data=np.transpose(self.GetModel().coef_), index=self._data_container.GetFeatureName(), columns=['Coef'])
+            df = pd.DataFrame(data=np.transpose(self.GetModel().coef_),
+                              index=self._data_container.GetFeatureName(), columns=['Coef'])
             df.to_csv(coef_path)
         except Exception as e:
             content = 'SVM with specific kernel does not give coef: '
             self.logger.error('{}{}'.format(content, str(e)))
             print('{} \n{}'.format(content, e.__str__()))
 
-        #Save the intercept_
+        # Save the intercept_
         try:
             intercept_path = os.path.join(store_folder, 'SVM_intercept.csv')
-            intercept_df = pd.DataFrame(data=(self.GetModel().intercept_).reshape(1, 1), index=['intercept'], columns=['value'])
+            intercept_df = pd.DataFrame(data=self.GetModel().intercept_.reshape(1, 1),
+                                        index=['intercept'], columns=['value'])
             intercept_df.to_csv(intercept_path)
         except Exception as e:
             content = 'SVM with specific kernel does not give intercept: '
@@ -163,6 +173,7 @@ def Save(self, store_folder):
 
         super(SVM, self).Save(store_folder)
 
+
 class LDA(Classifier):
     def __init__(self, **kwargs):
         super(LDA, self).__init__()
@@ -190,7 +201,8 @@ def Save(self, store_path):
         # Save the coefficients
         try:
             coef_path = os.path.join(store_path, 'LDA_coef.csv')
-            df = pd.DataFrame(data=np.transpose(self.GetModel().coef_), index=self._data_container.GetFeatureName(), columns=['Coef'])
+            df = pd.DataFrame(data=np.transpose(self.GetModel().coef_),
+                              index=self._data_container.GetFeatureName(), columns=['Coef'])
             df.to_csv(coef_path)
         except Exception as e:
             content = 'LDA with specific kernel does not give coef: '
@@ -199,21 +211,25 @@ def Save(self, store_path):
 
         super(LDA, self).Save(store_path)
 
+
 class RandomForest(Classifier):
     def __init__(self, **kwargs):
         super(RandomForest, self).__init__()
         if 'n_estimators' not in kwargs.keys():
-            super(RandomForest, self).SetModel(RandomForestClassifier(random_state=42, n_estimators=200, **kwargs))
+            super(RandomForest, self).SetModel(RandomForestClassifier(random_state=RANDOM_SEED[CLASSIFIER_RF],
+                                                                      n_estimators=200,
+                                                                      **kwargs))
         else:
-            super(RandomForest, self).SetModel(RandomForestClassifier(random_state=42, **kwargs))
+            super(RandomForest, self).SetModel(RandomForestClassifier(random_state=RANDOM_SEED[CLASSIFIER_RF],
+                                                                      **kwargs))
 
     def GetName(self):
-        return 'RF'
+        return CLASSIFIER_RF
 
     def GetDescription(self):
-        text = "We used random forest as the classifier. Random forest is an ensemble learning method which combining " \
-               "multiple decision trees at different subset of the training data set. Random forest is an effective " \
-               "method to avoid over-fitting. "
+        text = "We used random forest as the classifier. Random forest is an ensemble learning method which " \
+               "combining multiple decision trees at different subset of the training data set. Random forest " \
+               "is an effective method to avoid over-fitting. "
         return text
 
     def Predict(self, x, is_probability=True):
@@ -222,21 +238,22 @@ def Predict(self, x, is_probability=True):
         else:
             return super(RandomForest, self).Predict(x)
 
+
 class AE(Classifier):
     def __init__(self, **kwargs):
         super(AE, self).__init__()
-        if not 'early_stopping' in kwargs.keys():
+        if 'early_stopping' not in kwargs.keys():
             kwargs['early_stopping'] = True
-        super(AE, self).SetModel(MLPClassifier(random_state=42, **kwargs))
+        super(AE, self).SetModel(MLPClassifier(random_state=RANDOM_SEED[CLASSIFIER_AE], **kwargs))
 
     def GetName(self):
-        return 'AE'
+        return CLASSIFIER_AE
 
     def GetDescription(self):
-        text = "We used multi-layer perceptron (MLP), sometimes called auto-encoder (AE), as the classifier. MLP is based " \
-               "neural network with multi-hidden layers to find the mapping from inputted features to the label. Here " \
-               "we used 1 hidden layers with 100 hidden units. The non-linear activate function was rectified linear " \
-               "unit function and the optimizer was Adam with step 0.001. "
+        text = "We used multi-layer perceptron (MLP), sometimes called auto-encoder (AE), as the classifier. " \
+               "MLP is based neural network with multi-hidden layers to find the mapping from inputted features " \
+               "to the label. Here we used 1 hidden layers with 100 hidden units. The non-linear activate function " \
+               "was rectified linear unit function and the optimizer was Adam with step 0.001. "
         return text
 
     def Predict(self, x, is_probability=True):
@@ -245,18 +262,20 @@ def Predict(self, x, is_probability=True):
         else:
             return super(AE, self).Predict(x)
 
+
 class AdaBoost(Classifier):
     def __init__(self, **kwargs):
         super(AdaBoost, self).__init__()
-        super(AdaBoost, self).SetModel(AdaBoostClassifier(random_state=42, **kwargs))
+        super(AdaBoost, self).SetModel(AdaBoostClassifier(random_state=RANDOM_SEED[CLASSIFIER_AB], **kwargs))
 
     def GetName(self):
-        return 'AB'
+        return CLASSIFIER_AB
 
     def GetDescription(self):
-        text = "We used AdaBoost as the classifier. AdaBoost is a meta-algorithm that conjunct other type of algorithms " \
-               "and combine them to get a final output of boosted classifier. AdaBoost is sensitive to the noise and " \
-               "the outlier. Over-fitting can also be avoided by AdaBoost. Here we used decision tree as the base classifier. "
+        text = "We used AdaBoost as the classifier. AdaBoost is a meta-algorithm that conjunct other type of " \
+               "algorithms and combine them to get a final output of boosted classifier. AdaBoost is sensitive to " \
+               "the noise and the outlier. Over-fitting can also be avoided by AdaBoost. " \
+               "Here we used decision tree as the base classifier. "
         return text
 
     def Predict(self, x, is_probability=True):
@@ -265,17 +284,18 @@ def Predict(self, x, is_probability=True):
         else:
             return super(AdaBoost, self).Predict(x)
 
+
 class DecisionTree(Classifier):
     def __init__(self, **kwargs):
         super(DecisionTree, self).__init__()
-        super(DecisionTree, self).SetModel(DecisionTreeClassifier(random_state=42, **kwargs))
+        super(DecisionTree, self).SetModel(DecisionTreeClassifier(random_state=RANDOM_SEED[CLASSIFIER_DT], **kwargs))
 
     def GetName(self):
-        return 'DT'
+        return CLASSIFIER_DT
 
     def GetDescription(self):
-        text = "We used decision tree as the classifier. Decision tree is a non-parametric supervised learning method " \
-               "and can be used for classification with high interpretation. "
+        text = "We used decision tree as the classifier. Decision tree is a non-parametric supervised learning " \
+               "method and can be used for classification with high interpretation. "
         return text
 
     def Predict(self, x, is_probability=True):
@@ -284,13 +304,15 @@ def Predict(self, x, is_probability=True):
         else:
             return super(DecisionTree, self).Predict(x)
 
+
 class GaussianProcess(Classifier):
     def __init__(self, **kwargs):
         super(GaussianProcess, self).__init__()
-        super(GaussianProcess, self).SetModel(GaussianProcessClassifier(random_state=42, **kwargs))
+        super(GaussianProcess, self).SetModel(GaussianProcessClassifier(
+            random_state=RANDOM_SEED[CLASSIFIER_GP], **kwargs))
 
     def GetName(self):
-        return 'GP'
+        return CLASSIFIER_GP
 
     def GetDescription(self):
         text = "We used Gaussian process as the classifier. Gaussian process combines the features to build a joint " \
@@ -303,6 +325,7 @@ def Predict(self, x, is_probability=True):
         else:
             return super(GaussianProcess, self).Predict(x)
 
+
 class NaiveBayes(Classifier):
     def __init__(self, **kwargs):
         super(NaiveBayes, self).__init__()
@@ -312,8 +335,8 @@ def GetName(self):
         return 'NB'
 
     def GetDescription(self):
-        text = "We used naive Bayes as the classifier. Naive Bayes is a kind of probabilistic classifiers based on Bayes" \
-               "theorem. Naive Bayes requires  number of parameters linear in the number of features. "
+        text = "We used naive Bayes as the classifier. Naive Bayes is a kind of probabilistic classifiers " \
+               "based on Bayes theorem. Naive Bayes requires  number of parameters linear in the number of features. "
         return text
 
     def Predict(self, x, is_probability=True):
@@ -322,18 +345,18 @@ def Predict(self, x, is_probability=True):
         else:
             return super(NaiveBayes, self).Predict(x)
 
+
 class LR(Classifier):
     def __init__(self, **kwargs):
         super(LR, self).__init__()
         if 'solver' in kwargs.keys():
             super(LR, self).SetModel(LogisticRegression(penalty='none', **kwargs))
         else:
             super(LR, self).SetModel(LogisticRegression(penalty='none', solver='saga', tol=0.01,
-                                                        random_state=42, **kwargs))
-
+                                                        random_state=RANDOM_SEED[CLASSIFIER_LR], **kwargs))
 
     def GetName(self):
-        return 'LR'
+        return CLASSIFIER_LR
 
     def GetDescription(self):
         text = "We used logistic regression as the classifier. Logistic regression is a linear classifier that " \
@@ -354,7 +377,8 @@ def Save(self, store_path):
         # Save the coefficients
         try:
             coef_path = os.path.join(store_path, 'LR_coef.csv')
-            df = pd.DataFrame(data=np.transpose(self.GetModel().coef_), index=self._data_container.GetFeatureName(), columns=['Coef'])
+            df = pd.DataFrame(data=np.transpose(self.GetModel().coef_),
+                              index=self._data_container.GetFeatureName(), columns=['Coef'])
             df.to_csv(coef_path)
         except Exception as e:
             content = 'LR can not load coef: '
@@ -363,7 +387,8 @@ def Save(self, store_path):
 
         try:
             intercept_path = os.path.join(store_path, 'LR_intercept.csv')
-            intercept_df = pd.DataFrame(data=(self.GetModel().intercept_).reshape(1, 1), index=['intercept'], columns=['value'])
+            intercept_df = pd.DataFrame(data=self.GetModel().intercept_.reshape(1, 1),
+                                        index=['intercept'], columns=['value'])
             intercept_df.to_csv(intercept_path)
         except Exception as e:
             content = 'LR can not load intercept: '
@@ -372,20 +397,21 @@ def Save(self, store_path):
 
         super(LR, self).Save(store_path)
 
+
 class LRLasso(Classifier):
     def __init__(self, **kwargs):
         super(LRLasso, self).__init__()
         if 'solver' in kwargs.keys():
             super(LRLasso, self).SetModel(LogisticRegression(penalty='l1', **kwargs))
         else:
             super(LRLasso, self).SetModel(LogisticRegression(penalty='l1', solver='liblinear',
-                                                             random_state=42, **kwargs))
+                                                             random_state=RANDOM_SEED[CLASSIFIER_LRLasso], **kwargs))
 
     def GetName(self):
-        return 'LRLasso'
+        return CLASSIFIER_LRLasso
 
     def GetDescription(self):
-        text = "We used logistic regression with LASSO constrain as the classifier. Logistic regression with LASSON " \
+        text = "We used logistic regression with LASSO constrain as the classifier. Logistic regression with LASSO " \
                "constrain is a linear classifier based on logistic regression. L1 norm is added in the final lost " \
                "function and the weights was constrained, which make the features sparse. "
         return text
@@ -404,7 +430,8 @@ def Save(self, store_path):
         # Save the coefficients
         try:
             coef_path = os.path.join(store_path, 'LRLasso_coef.csv')
-            df = pd.DataFrame(data=np.transpose(self.GetModel().coef_), index=self._data_container.GetFeatureName(), columns=['Coef'])
+            df = pd.DataFrame(data=np.transpose(self.GetModel().coef_),
+                              index=self._data_container.GetFeatureName(), columns=['Coef'])
             df.to_csv(coef_path)
         except Exception as e:
             content = 'LASSO can not load coef: '
@@ -413,7 +440,8 @@ def Save(self, store_path):
 
         try:
             intercept_path = os.path.join(store_path, 'LRLasso_intercept.csv')
-            intercept_df = pd.DataFrame(data=(self.GetModel().intercept_).reshape(1, 1), index=['intercept'], columns=['value'])
+            intercept_df = pd.DataFrame(data=self.GetModel().intercept_.reshape(1, 1),
+                                        index=['intercept'], columns=['value'])
             intercept_df.to_csv(intercept_path)
         except Exception as e:
             content = 'LASSO can not load intercept: '
@@ -422,6 +450,7 @@ def Save(self, store_path):
 
         super(LRLasso, self).Save(store_path)
 
+
 if __name__ == '__main__':
     X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
     y = np.array([1, 1, 0, 0])
@@ -475,5 +504,3 @@ def Save(self, store_path):
     clf.SetData(X, y)
     clf.Fit()
     print(clf.GetName(), clf.Predict([[1, 1]]))
-
-