forked from salan668/FAE
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add the description of the main classes and the main functions. This …
…make the code readable.
- Loading branch information
Showing
12 changed files
with
150 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,8 @@ | ||
'''. | ||
Jun 17, 2018. | ||
Yang SONG, [email protected] | ||
''' | ||
|
||
import numpy as np | ||
import os | ||
import pandas as pd | ||
|
@@ -6,6 +11,10 @@ | |
|
||
|
||
class DataContainer: | ||
''' | ||
DataContainer is the key class of the FAP project. It is the node to connect different models. Almost all procesors | ||
accept DataContainer and return a new DataContainer. | ||
''' | ||
def __init__(self, array=np.array([]), label=np.array([]), feature_name=[], case_name=[]): | ||
self.__feature_name = feature_name | ||
self.__case_name = case_name | ||
|
@@ -121,6 +130,12 @@ def UsualAndL2Normalize(self, store_path='', axis=0): | |
df.to_csv(store_path) | ||
|
||
def ArtefactNormalize(self, normalization_file): | ||
''' | ||
This function can use the existing file with the infoamtion of the normalization. It is usually used on the fact | ||
that a learnt model is used to process the testing data set. | ||
:param normalization_file: the stored file with the information of the normalization. | ||
:return: | ||
''' | ||
df = pd.read_csv(normalization_file, header=0, index_col=0) | ||
mean_value = df.loc['mean'].values | ||
std_value = df.loc['std'].values | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,16 @@ | ||
'''. | ||
Jun 17, 2018. | ||
Yang SONG, [email protected] | ||
''' | ||
|
||
import numpy as np | ||
from random import shuffle | ||
import os | ||
import pandas as pd | ||
|
||
|
||
from FAP.DataContainer.DataContainer import DataContainer | ||
|
||
def SeperateDataToTrainingAndTesting(data, percentage=0.2, label=np.array(()), training_index_list = [], store_folder=''): | ||
def SeperateDataToTrainingAndTesting(data, testing_percentage=0.2, label=np.array(()), training_index_list = [], store_folder=''): | ||
is_label = True | ||
if label.size == 0: | ||
label = np.zeros((data.shape[0]), ) | ||
|
@@ -19,8 +23,8 @@ def SeperateDataToTrainingAndTesting(data, percentage=0.2, label=np.array(()), t | |
index = np.where(label == group)[0] | ||
|
||
shuffle(index) | ||
testing_index = index[:round(len(index) * percentage)] | ||
training_index = index[round(len(index) * percentage):] | ||
testing_index = index[:round(len(index) * testing_percentage)] | ||
training_index = index[round(len(index) * testing_percentage):] | ||
|
||
training_index_list.extend(training_index) | ||
testing_index_list.extend(testing_index) | ||
|
@@ -55,7 +59,16 @@ def SeperateDataToTrainingAndTesting(data, percentage=0.2, label=np.array(()), t | |
'training_index': training_index_list, | ||
'testing_index': testing_index_list} | ||
|
||
def GenerateTrainingAndTestingData(csv_file_path, training_index=[], percentage=0.3, is_store_index=False): | ||
def GenerateTrainingAndTestingData(csv_file_path, training_index=[], testing_percentage=0.3, is_store_index=False): | ||
''' | ||
Seperate the data container into training part and the testing part. | ||
:param csv_file_path: The file path of the data container | ||
:param training_index: The index of the training data set. This is usually to compare with different combination | ||
of the sequences. Default is [] | ||
:param testing_percentage: The percentage of data set is used to separate for testing data set. Default is 30% | ||
:param is_store_index: To store or not. Default is False. | ||
:return: | ||
''' | ||
data_container = DataContainer() | ||
data, label, feature_name, case_name = data_container.LoadAndGetData(csv_file_path) | ||
folder_path = os.path.split(csv_file_path)[0] | ||
|
@@ -73,7 +86,7 @@ def GenerateTrainingAndTestingData(csv_file_path, training_index=[], percentage= | |
else: | ||
store_folder = '' | ||
|
||
output = SeperateDataToTrainingAndTesting(data, percentage, label, training_index_list=training_index, store_folder=store_folder) | ||
output = SeperateDataToTrainingAndTesting(data, testing_percentage, label, training_index_list=training_index, store_folder=store_folder) | ||
|
||
training_data_contrainer = DataContainer(output['training_data'], output['training_label'], feature_name, | ||
[case_name[temp] for temp in output['training_index']]) | ||
|
@@ -84,9 +97,8 @@ def GenerateTrainingAndTestingData(csv_file_path, training_index=[], percentage= | |
testing_data_contrainer.Save(os.path.join(testing_folder, 'numeric_feature.csv')) | ||
|
||
|
||
# Demo | ||
if __name__ == '__main__': | ||
GenerateTrainingAndTestingData(r'C:\MyCode\PythonScript\EyeEnt\lymphoma_MM\T1C_T2\T1_T2', percentage=0.3) | ||
GenerateTrainingAndTestingData(r'C:\MyCode\PythonScript\EyeEnt\lymphoma_MM\T1C_T2\T1_T2', testing_percentage=0.3) | ||
training_index = pd.read_csv(r'C:\MyCode\PythonScript\EyeEnt\lymphoma_MM\T1C_T2\T1_T2\training\training_index.csv') | ||
training_index = training_index.values[:, 1].tolist() | ||
GenerateTrainingAndTestingData(r'C:\MyCode\PythonScript\EyeEnt\lymphoma_MM\T1C_T2\T1', training_index=training_index) | ||
|
@@ -97,4 +109,4 @@ def GenerateTrainingAndTestingData(csv_file_path, training_index=[], percentage= | |
training_index = pd.read_csv(r'C:\MyCode\PythonScript\EyeEnt\lymphoma_MM\T1C_T2\T1\training\training_index.csv') | ||
print(training_index.values[:, 1].tolist()) | ||
training_index = pd.read_csv(r'C:\MyCode\PythonScript\EyeEnt\lymphoma_MM\T1C_T2\T2\training\training_index.csv') | ||
print(training_index.values[:, 1].tolist()) | ||
print(training_index.values[:, 1].tolist()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,8 @@ | ||
'''. | ||
Jun 17, 2018. | ||
Yang SONG, [email protected] | ||
''' | ||
|
||
from FAP.DataContainer.DataContainer import DataContainer | ||
from FAP.FeatureAnalysis.CrossValidation import CrossValidation, CrossValidationOnFeatureNumber | ||
from FAP.FeatureAnalysis.FeatureSelector import * | ||
|
@@ -6,23 +11,37 @@ | |
import pandas as pd | ||
|
||
class FeatureAnalysisExplore: | ||
''' | ||
This is the input of the FAP project. It accepts the candidate feature selector list and the candidate classifier | ||
list. Then the result of the metrics were stored with the combination of different feature selector and classifier. | ||
''' | ||
def __init__(self, feature_selector_list=[], classifier_list=[], | ||
cv=CrossValidationOnFeatureNumber('5-folder'), max_feature_number=1): | ||
self.__feature_selector_list = feature_selector_list | ||
self.__classifier_list = classifier_list | ||
self.__cv = cv | ||
self.__max_feature_number = max_feature_number | ||
|
||
def RunOneModel(self, data_container, feature_selector, classifier, cv, store_folder=''): | ||
def RunOneModel(self, data_container, feature_selector, classifier, cv, test_data_container=DataContainer(), store_folder=''): | ||
''' | ||
:param data_container: The implement of the DataContainer. | ||
:param feature_selector: The implement of the FeatureSelector. | ||
:param classifier: The implement of the Classifier | ||
:param cv: The implement of the CrossValidation | ||
:param store_folder: The path of the store folder.. | ||
:return: The metric of the validation data. | ||
''' | ||
feature_selector.SetDataContainer(data_container) | ||
selected_data_container = feature_selector.Run(store_folder) | ||
|
||
cv.SetClassifier(classifier) | ||
cv.SetDataContainer(selected_data_container) | ||
|
||
train_metric, val_metric = cv.Run() | ||
train_metric, val_metric, test_metric = cv.Run(data_container, test_data_container=test_data_container, | ||
store_folder=store_folder) | ||
|
||
return val_metric | ||
return val_metric, test_metric | ||
|
||
def Run(self, data_container, test_data_container=DataContainer(), store_folder=''): | ||
|
||
|
@@ -75,11 +94,6 @@ def Run(self, data_container, test_data_container=DataContainer(), store_folder= | |
data_container.Load(r'..\tempResult\NumericFeature.csv') | ||
data_container.UsualNormalize() | ||
|
||
column_list = ['sample_number', 'positive_number', 'negative_number', | ||
'auc', 'auc 95% CIs', 'accuracy', 'feature_number', | ||
'Yorden Index', 'sensitivity', 'specificity', | ||
'positive predictive value', 'negative predictive value'] | ||
|
||
df = pd.DataFrame(columns=column_list) | ||
|
||
# Set Feature Selector List | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,11 +9,10 @@ | |
def DrawROCList(pred_list, label_list, name_list='', store_path=''): | ||
''' | ||
To Draw the ROC curve. | ||
:param pred_list: The list of the prediction. | ||
:param label_list: The list of the label. | ||
:param name_list: The list of the legend name. | ||
:param store_path: The store path. Support jpeg and tif. | ||
:param store_path: The store path. Support jpg and eps. | ||
:return: None | ||
Apr-28-18, Yang SONG [[email protected]] | ||
|
Oops, something went wrong.