Skip to content

Commit

Permalink
Add the config files for the setting of random state.
Browse files Browse the repository at this point in the history
  • Loading branch information
salan668 committed May 27, 2020
1 parent 0fb166e commit aeada6c
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 65 deletions.
139 changes: 83 additions & 56 deletions FAE/FeatureAnalysis/Classifier.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""
All rights reserved.
-- Yang Song.
"""
import os
import pickle
from copy import deepcopy

import pandas as pd
import numpy as np
from abc import ABCMeta,abstractmethod
from abc import ABCMeta, abstractmethod
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
Expand All @@ -16,19 +20,22 @@

from FAE.DataContainer.DataContainer import DataContainer
from Utility.EcLog import eclog
from Utility.Constants import *
from FAE.HyperParameterConfig.HyperParamManager import RANDOM_SEED


class Classifier:
'''
"""
This is the base class of the classifer. All the specific classifier need to be artributed from this base class.
'''
"""
def __init__(self):
self.__model = None
self._x = np.array([])
self._y = np.array([])
self._data_container = DataContainer()
self.logger = eclog(os.path.split(__file__)[-1]).GetLogger()

def __deepcopy__(self, memodict={}):
def __deepcopy__(self):
copy_classifier = type(self)()
copy_classifier._data_container = deepcopy(self._data_container)
copy_classifier._x, copy_classifier._y = deepcopy(self._x), deepcopy(self._y)
Expand Down Expand Up @@ -107,21 +114,22 @@ def Load(self, store_path):
def GetName(self):
pass


class SVM(Classifier):
def __init__(self, **kwargs):
super(SVM, self).__init__()
if not 'kernel' in kwargs.keys():
if 'kernel' not in kwargs.keys():
kwargs['kernel'] = 'linear'
if not 'C' in kwargs.keys():
if 'C' not in kwargs.keys():
kwargs['C'] = 1.0
if not 'probability' in kwargs.keys():
if 'probability' not in kwargs.keys():
kwargs['probability'] = True
super(SVM, self).SetModel(SVC(random_state=42, **kwargs))
super(SVM, self).SetModel(SVC(random_state=RANDOM_SEED[CLASSIFIER_SVM], **kwargs))

self.__name = 'SVM_'+ kwargs['kernel'] + '_C_' + '{:.3f}'.format(kwargs['C'])
self.__name = 'SVM_' + kwargs['kernel'] + '_C_' + '{:.3f}'.format(kwargs['C'])

def GetName(self):
return 'SVM'
return CLASSIFIER_SVM

def Predict(self, x, is_probability=True):
if is_probability:
Expand All @@ -130,10 +138,10 @@ def Predict(self, x, is_probability=True):
return super(SVM, self).Predict(x)

def GetDescription(self):
text = "We used support vector machine (SVM) as the classifier. SVM was an effective and robust classifier to " \
"build the model. The kernel function has the ability to map the features into a higher dimension to search " \
"the hyper-plane for separating the cases with different labels. Here we used the linear kernel function because " \
"it was easier to explain the coefficients of the features for the final model. "
text = "We used support vector machine (SVM) as the classifier. SVM was an effective and robust classifier " \
"to build the model. The kernel function has the ability to map the features into a higher dimension " \
"to search the hyper-plane for separating the cases with different labels. Here we used the linear " \
"kernel function because it was easier to explain the coefficients of the features for the final model. "
return text

def Save(self, store_folder):
Expand All @@ -144,17 +152,19 @@ def Save(self, store_folder):
# Save the coefficients
try:
coef_path = os.path.join(store_folder, 'SVM_coef.csv')
df = pd.DataFrame(data=np.transpose(self.GetModel().coef_), index=self._data_container.GetFeatureName(), columns=['Coef'])
df = pd.DataFrame(data=np.transpose(self.GetModel().coef_),
index=self._data_container.GetFeatureName(), columns=['Coef'])
df.to_csv(coef_path)
except Exception as e:
content = 'SVM with specific kernel does not give coef: '
self.logger.error('{}{}'.format(content, str(e)))
print('{} \n{}'.format(content, e.__str__()))

#Save the intercept_
# Save the intercept_
try:
intercept_path = os.path.join(store_folder, 'SVM_intercept.csv')
intercept_df = pd.DataFrame(data=(self.GetModel().intercept_).reshape(1, 1), index=['intercept'], columns=['value'])
intercept_df = pd.DataFrame(data=self.GetModel().intercept_.reshape(1, 1),
index=['intercept'], columns=['value'])
intercept_df.to_csv(intercept_path)
except Exception as e:
content = 'SVM with specific kernel does not give intercept: '
Expand All @@ -163,6 +173,7 @@ def Save(self, store_folder):

super(SVM, self).Save(store_folder)


class LDA(Classifier):
def __init__(self, **kwargs):
super(LDA, self).__init__()
Expand Down Expand Up @@ -190,7 +201,8 @@ def Save(self, store_path):
# Save the coefficients
try:
coef_path = os.path.join(store_path, 'LDA_coef.csv')
df = pd.DataFrame(data=np.transpose(self.GetModel().coef_), index=self._data_container.GetFeatureName(), columns=['Coef'])
df = pd.DataFrame(data=np.transpose(self.GetModel().coef_),
index=self._data_container.GetFeatureName(), columns=['Coef'])
df.to_csv(coef_path)
except Exception as e:
content = 'LDA with specific kernel does not give coef: '
Expand All @@ -199,21 +211,25 @@ def Save(self, store_path):

super(LDA, self).Save(store_path)


class RandomForest(Classifier):
def __init__(self, **kwargs):
super(RandomForest, self).__init__()
if 'n_estimators' not in kwargs.keys():
super(RandomForest, self).SetModel(RandomForestClassifier(random_state=42, n_estimators=200, **kwargs))
super(RandomForest, self).SetModel(RandomForestClassifier(random_state=RANDOM_SEED[CLASSIFIER_RF],
n_estimators=200,
**kwargs))
else:
super(RandomForest, self).SetModel(RandomForestClassifier(random_state=42, **kwargs))
super(RandomForest, self).SetModel(RandomForestClassifier(random_state=RANDOM_SEED[CLASSIFIER_RF],
**kwargs))

def GetName(self):
return 'RF'
return CLASSIFIER_RF

def GetDescription(self):
text = "We used random forest as the classifier. Random forest is an ensemble learning method which combining " \
"multiple decision trees at different subset of the training data set. Random forest is an effective " \
"method to avoid over-fitting. "
text = "We used random forest as the classifier. Random forest is an ensemble learning method which " \
"combining multiple decision trees at different subset of the training data set. Random forest " \
"is an effective method to avoid over-fitting. "
return text

def Predict(self, x, is_probability=True):
Expand All @@ -222,21 +238,22 @@ def Predict(self, x, is_probability=True):
else:
return super(RandomForest, self).Predict(x)


class AE(Classifier):
def __init__(self, **kwargs):
super(AE, self).__init__()
if not 'early_stopping' in kwargs.keys():
if 'early_stopping' not in kwargs.keys():
kwargs['early_stopping'] = True
super(AE, self).SetModel(MLPClassifier(random_state=42, **kwargs))
super(AE, self).SetModel(MLPClassifier(random_state=RANDOM_SEED[CLASSIFIER_AE], **kwargs))

def GetName(self):
return 'AE'
return CLASSIFIER_AE

def GetDescription(self):
text = "We used multi-layer perceptron (MLP), sometimes called auto-encoder (AE), as the classifier. MLP is based " \
"neural network with multi-hidden layers to find the mapping from inputted features to the label. Here " \
"we used 1 hidden layers with 100 hidden units. The non-linear activate function was rectified linear " \
"unit function and the optimizer was Adam with step 0.001. "
text = "We used multi-layer perceptron (MLP), sometimes called auto-encoder (AE), as the classifier. " \
"MLP is based neural network with multi-hidden layers to find the mapping from inputted features " \
"to the label. Here we used 1 hidden layers with 100 hidden units. The non-linear activate function " \
"was rectified linear unit function and the optimizer was Adam with step 0.001. "
return text

def Predict(self, x, is_probability=True):
Expand All @@ -245,18 +262,20 @@ def Predict(self, x, is_probability=True):
else:
return super(AE, self).Predict(x)


class AdaBoost(Classifier):
def __init__(self, **kwargs):
super(AdaBoost, self).__init__()
super(AdaBoost, self).SetModel(AdaBoostClassifier(random_state=42, **kwargs))
super(AdaBoost, self).SetModel(AdaBoostClassifier(random_state=RANDOM_SEED[CLASSIFIER_AB], **kwargs))

def GetName(self):
return 'AB'
return CLASSIFIER_AB

def GetDescription(self):
text = "We used AdaBoost as the classifier. AdaBoost is a meta-algorithm that conjunct other type of algorithms " \
"and combine them to get a final output of boosted classifier. AdaBoost is sensitive to the noise and " \
"the outlier. Over-fitting can also be avoided by AdaBoost. Here we used decision tree as the base classifier. "
text = "We used AdaBoost as the classifier. AdaBoost is a meta-algorithm that conjunct other type of " \
"algorithms and combine them to get a final output of boosted classifier. AdaBoost is sensitive to " \
"the noise and the outlier. Over-fitting can also be avoided by AdaBoost. " \
"Here we used decision tree as the base classifier. "
return text

def Predict(self, x, is_probability=True):
Expand All @@ -265,17 +284,18 @@ def Predict(self, x, is_probability=True):
else:
return super(AdaBoost, self).Predict(x)


class DecisionTree(Classifier):
def __init__(self, **kwargs):
super(DecisionTree, self).__init__()
super(DecisionTree, self).SetModel(DecisionTreeClassifier(random_state=42, **kwargs))
super(DecisionTree, self).SetModel(DecisionTreeClassifier(random_state=RANDOM_SEED[CLASSIFIER_DT], **kwargs))

def GetName(self):
return 'DT'
return CLASSIFIER_DT

def GetDescription(self):
text = "We used decision tree as the classifier. Decision tree is a non-parametric supervised learning method " \
"and can be used for classification with high interpretation. "
text = "We used decision tree as the classifier. Decision tree is a non-parametric supervised learning " \
"method and can be used for classification with high interpretation. "
return text

def Predict(self, x, is_probability=True):
Expand All @@ -284,13 +304,15 @@ def Predict(self, x, is_probability=True):
else:
return super(DecisionTree, self).Predict(x)


class GaussianProcess(Classifier):
def __init__(self, **kwargs):
super(GaussianProcess, self).__init__()
super(GaussianProcess, self).SetModel(GaussianProcessClassifier(random_state=42, **kwargs))
super(GaussianProcess, self).SetModel(GaussianProcessClassifier(
random_state=RANDOM_SEED[CLASSIFIER_GP], **kwargs))

def GetName(self):
return 'GP'
return CLASSIFIER_GP

def GetDescription(self):
text = "We used Gaussian process as the classifier. Gaussian process combines the features to build a joint " \
Expand All @@ -303,6 +325,7 @@ def Predict(self, x, is_probability=True):
else:
return super(GaussianProcess, self).Predict(x)


class NaiveBayes(Classifier):
def __init__(self, **kwargs):
super(NaiveBayes, self).__init__()
Expand All @@ -312,8 +335,8 @@ def GetName(self):
return 'NB'

def GetDescription(self):
text = "We used naive Bayes as the classifier. Naive Bayes is a kind of probabilistic classifiers based on Bayes" \
"theorem. Naive Bayes requires number of parameters linear in the number of features. "
text = "We used naive Bayes as the classifier. Naive Bayes is a kind of probabilistic classifiers " \
"based on Bayes theorem. Naive Bayes requires number of parameters linear in the number of features. "
return text

def Predict(self, x, is_probability=True):
Expand All @@ -322,18 +345,18 @@ def Predict(self, x, is_probability=True):
else:
return super(NaiveBayes, self).Predict(x)


class LR(Classifier):
def __init__(self, **kwargs):
super(LR, self).__init__()
if 'solver' in kwargs.keys():
super(LR, self).SetModel(LogisticRegression(penalty='none', **kwargs))
else:
super(LR, self).SetModel(LogisticRegression(penalty='none', solver='saga', tol=0.01,
random_state=42, **kwargs))

random_state=RANDOM_SEED[CLASSIFIER_LR], **kwargs))

def GetName(self):
return 'LR'
return CLASSIFIER_LR

def GetDescription(self):
text = "We used logistic regression as the classifier. Logistic regression is a linear classifier that " \
Expand All @@ -354,7 +377,8 @@ def Save(self, store_path):
# Save the coefficients
try:
coef_path = os.path.join(store_path, 'LR_coef.csv')
df = pd.DataFrame(data=np.transpose(self.GetModel().coef_), index=self._data_container.GetFeatureName(), columns=['Coef'])
df = pd.DataFrame(data=np.transpose(self.GetModel().coef_),
index=self._data_container.GetFeatureName(), columns=['Coef'])
df.to_csv(coef_path)
except Exception as e:
content = 'LR can not load coef: '
Expand All @@ -363,7 +387,8 @@ def Save(self, store_path):

try:
intercept_path = os.path.join(store_path, 'LR_intercept.csv')
intercept_df = pd.DataFrame(data=(self.GetModel().intercept_).reshape(1, 1), index=['intercept'], columns=['value'])
intercept_df = pd.DataFrame(data=self.GetModel().intercept_.reshape(1, 1),
index=['intercept'], columns=['value'])
intercept_df.to_csv(intercept_path)
except Exception as e:
content = 'LR can not load intercept: '
Expand All @@ -372,20 +397,21 @@ def Save(self, store_path):

super(LR, self).Save(store_path)


class LRLasso(Classifier):
def __init__(self, **kwargs):
super(LRLasso, self).__init__()
if 'solver' in kwargs.keys():
super(LRLasso, self).SetModel(LogisticRegression(penalty='l1', **kwargs))
else:
super(LRLasso, self).SetModel(LogisticRegression(penalty='l1', solver='liblinear',
random_state=42, **kwargs))
random_state=RANDOM_SEED[CLASSIFIER_LRLasso], **kwargs))

def GetName(self):
return 'LRLasso'
return CLASSIFIER_LRLasso

def GetDescription(self):
text = "We used logistic regression with LASSO constrain as the classifier. Logistic regression with LASSON " \
text = "We used logistic regression with LASSO constrain as the classifier. Logistic regression with LASSO " \
"constrain is a linear classifier based on logistic regression. L1 norm is added in the final lost " \
"function and the weights was constrained, which make the features sparse. "
return text
Expand All @@ -404,7 +430,8 @@ def Save(self, store_path):
# Save the coefficients
try:
coef_path = os.path.join(store_path, 'LRLasso_coef.csv')
df = pd.DataFrame(data=np.transpose(self.GetModel().coef_), index=self._data_container.GetFeatureName(), columns=['Coef'])
df = pd.DataFrame(data=np.transpose(self.GetModel().coef_),
index=self._data_container.GetFeatureName(), columns=['Coef'])
df.to_csv(coef_path)
except Exception as e:
content = 'LASSO can not load coef: '
Expand All @@ -413,7 +440,8 @@ def Save(self, store_path):

try:
intercept_path = os.path.join(store_path, 'LRLasso_intercept.csv')
intercept_df = pd.DataFrame(data=(self.GetModel().intercept_).reshape(1, 1), index=['intercept'], columns=['value'])
intercept_df = pd.DataFrame(data=self.GetModel().intercept_.reshape(1, 1),
index=['intercept'], columns=['value'])
intercept_df.to_csv(intercept_path)
except Exception as e:
content = 'LASSO can not load intercept: '
Expand All @@ -422,6 +450,7 @@ def Save(self, store_path):

super(LRLasso, self).Save(store_path)


if __name__ == '__main__':
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 0, 0])
Expand Down Expand Up @@ -475,5 +504,3 @@ def Save(self, store_path):
clf.SetData(X, y)
clf.Fit()
print(clf.GetName(), clf.Predict([[1, 1]]))


Loading

0 comments on commit aeada6c

Please sign in to comment.