From 30229c087b08172c39618dc50249a51e70f3ceff Mon Sep 17 00:00:00 2001 From: Alexhaoge <1810064@mail.nankai.edu.cn> Date: Mon, 28 Dec 2020 00:47:02 +0800 Subject: [PATCH] fine tune svm & update gitignore --- .gitignore | 1 + MLSR/primary.py | 30 ++++++++++++++------- log/svm/2020_12_27_22_25_19.log.txt | 42 ++++++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 4590222..38bc907 100644 --- a/.gitignore +++ b/.gitignore @@ -135,4 +135,5 @@ dmypy.json .pyre/ nohup.out +*.nohup.out /.vscode/ \ No newline at end of file diff --git a/MLSR/primary.py b/MLSR/primary.py index 9e95189..9e72fbc 100644 --- a/MLSR/primary.py +++ b/MLSR/primary.py @@ -10,10 +10,10 @@ from time import strftime, localtime from .data import DataSet from .plot import plot_confusion_matrix -from os import mkdir -def lower_bound(cv_results): +@DeprecationWarning +def lower_bound(cv_results: dict): """ Calculate the lower bound within 1 standard deviation of the best `mean_test_scores`. @@ -34,7 +34,8 @@ def lower_bound(cv_results): - cv_results['std_test_score'][best_score_idx]) -def best_low_complexity(cv_results): +@DeprecationWarning +def best_low_complexity(cv_results: dict): """ Balance model complexity with cross-validated score. Author: Wenhao Zhang @@ -203,16 +204,25 @@ def do_svm(dataset: DataSet, log_dir: str = '../log', grid: dict = None): """ from sklearn.svm import SVC if grid is None: + # rough grid + # grid = { + # 'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'], + # 'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100], + # 'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'], + # 'SVM__degree': [3, 5], + # 'SVM__decision_function_shape': ['ovo', 'ovr'], + # 'SVM__class_weight': [None, 'balanced'], + # 'SVM__max_iter': [-1, 300], + # 'SVM__break_ties': [True, False], + # 'SVM__shrinking': [True, False] + # } + # fine grid grid = { - 'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'], - 'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100], - 'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'], - 'SVM__degree': [3, 5], + 'SVM__kernel': ['linear', 'rbf', 'poly'], + 'SVM__C': [0.7, 0.8, 0.9, 0.95, 1, 1.05, 1.1, 1.2, 1.5, 2], + 'SVM__degree': [2, 3, 4], 'SVM__decision_function_shape': ['ovo', 'ovr'], - 'SVM__class_weight': [None, 'balanced'], - 'SVM__max_iter': [-1, 300], 'SVM__break_ties': [True, False], - 'SVM__shrinking': [True, False] } pipe = Pipeline([ ('scaler', MinMaxScaler()), diff --git a/log/svm/2020_12_27_22_25_19.log.txt b/log/svm/2020_12_27_22_25_19.log.txt index 54e1187..7f1d1bd 100644 --- a/log/svm/2020_12_27_22_25_19.log.txt +++ b/log/svm/2020_12_27_22_25_19.log.txt @@ -1,4 +1,44 @@ -{'cv': 5, 'error_score': nan, 'estimator__memory': None, 'estimator__steps': [('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))], 'estimator__verbose': False, 'estimator__scaler': MinMaxScaler(), 'estimator__SVM': SVC(cache_size=500), 'estimator__scaler__clip': False, 'estimator__scaler__copy': True, 'estimator__scaler__feature_range': (0, 1), 'estimator__SVM__C': 1.0, 'estimator__SVM__break_ties': False, 'estimator__SVM__cache_size': 500, 'estimator__SVM__class_weight': None, 'estimator__SVM__coef0': 0.0, 'estimator__SVM__decision_function_shape': 'ovr', 'estimator__SVM__degree': 3, 'estimator__SVM__gamma': 'scale', 'estimator__SVM__kernel': 'rbf', 'estimator__SVM__max_iter': -1, 'estimator__SVM__probability': False, 'estimator__SVM__random_state': None, 'estimator__SVM__shrinking': True, 'estimator__SVM__tol': 0.001, 'estimator__SVM__verbose': False, 'estimator': Pipeline(steps=[('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))]), 'n_jobs': -1, 'param_grid': {'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'], 'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100], 'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'], 'SVM__degree': [3, 5], 'SVM__decision_function_shape': ['ovo', 'ovr'], 'SVM__class_weight': [None, 'balanced'], 'SVM__max_iter': [-1, 300], 'SVM__break_ties': [True, False], 'SVM__shrinking': [True, False]}, 'pre_dispatch': '2*n_jobs', 'refit': 'f1', 'return_train_score': False, 'scoring': {'f1': 'f1_macro', 'accuracy': 'accuracy'}, 'verbose': 2} +{'cv': 5, 'error_score': nan, 'estimator__memory': None, + 'estimator__steps': [('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))], + 'estimator__verbose': False, + 'estimator__scaler': MinMaxScaler(), + 'estimator__SVM': SVC(cache_size=500), + 'estimator__scaler__clip': False, + 'estimator__scaler__copy': True, + 'estimator__scaler__feature_range': (0, 1), + 'estimator__SVM__C': 1.0, + 'estimator__SVM__break_ties': False, + 'estimator__SVM__cache_size': 500, + 'estimator__SVM__class_weight': None, + 'estimator__SVM__coef0': 0.0, + 'estimator__SVM__decision_function_shape': 'ovr', + 'estimator__SVM__degree': 3, + 'estimator__SVM__gamma': 'scale', + 'estimator__SVM__kernel': 'rbf', + 'estimator__SVM__max_iter': -1, + 'estimator__SVM__probability': False, + 'estimator__SVM__random_state': None, + 'estimator__SVM__shrinking': True, + 'estimator__SVM__tol': 0.001, + 'estimator__SVM__verbose': False, + 'estimator': Pipeline(steps=[('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))]), + 'n_jobs': -1, + 'param_grid': { + 'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'], + 'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100], + 'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'], + 'SVM__degree': [3, 5], + 'SVM__decision_function_shape': ['ovo', 'ovr'], + 'SVM__class_weight': [None, 'balanced'], + 'SVM__max_iter': [-1, 300], + 'SVM__break_ties': [True, False], + 'SVM__shrinking': [True, False]}, + 'pre_dispatch': '2*n_jobs', + 'refit': 'f1', + 'return_train_score': False, + 'scoring': {'f1': 'f1_macro', 'accuracy': 'accuracy'}, + 'verbose': 2 + } Best score on training set by grid search cross validation: 0.7672520801145016 Accuracy on test set: 0.7044317369549679 F1-score on test set: 0.7709042033053236