From 30229c087b08172c39618dc50249a51e70f3ceff Mon Sep 17 00:00:00 2001
From: Alexhaoge <1810064@mail.nankai.edu.cn>
Date: Mon, 28 Dec 2020 00:47:02 +0800
Subject: [PATCH] fine tune svm & update gitignore

---
 .gitignore                          |  1 +
 MLSR/primary.py                     | 30 ++++++++++++++-------
 log/svm/2020_12_27_22_25_19.log.txt | 42 ++++++++++++++++++++++++++++-
 3 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/.gitignore b/.gitignore
index 4590222..38bc907 100644
--- a/.gitignore
+++ b/.gitignore
@@ -135,4 +135,5 @@ dmypy.json
 .pyre/
 
 nohup.out
+*.nohup.out
 /.vscode/
\ No newline at end of file
diff --git a/MLSR/primary.py b/MLSR/primary.py
index 9e95189..9e72fbc 100644
--- a/MLSR/primary.py
+++ b/MLSR/primary.py
@@ -10,10 +10,10 @@
 from time import strftime, localtime
 from .data import DataSet
 from .plot import plot_confusion_matrix
-from os import mkdir
 
 
-def lower_bound(cv_results):
+@DeprecationWarning
+def lower_bound(cv_results: dict):
     """
     Calculate the lower bound within 1 standard deviation
     of the best `mean_test_scores`.
@@ -34,7 +34,8 @@ def lower_bound(cv_results):
             - cv_results['std_test_score'][best_score_idx])
 
 
-def best_low_complexity(cv_results):
+@DeprecationWarning
+def best_low_complexity(cv_results: dict):
     """
     Balance model complexity with cross-validated score.
     Author: Wenhao Zhang <wenhaoz@ucla.edu>
@@ -203,16 +204,25 @@ def do_svm(dataset: DataSet, log_dir: str = '../log', grid: dict = None):
     """
     from sklearn.svm import SVC
     if grid is None:
+        # rough grid
+        # grid = {
+        #     'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
+        #     'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100],
+        #     'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'],
+        #     'SVM__degree': [3, 5],
+        #     'SVM__decision_function_shape': ['ovo', 'ovr'],
+        #     'SVM__class_weight': [None, 'balanced'],
+        #     'SVM__max_iter': [-1, 300],
+        #     'SVM__break_ties': [True, False],
+        #     'SVM__shrinking': [True, False]
+        # }
+        # fine grid
         grid = {
-            'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
-            'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100],
-            'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'],
-            'SVM__degree': [3, 5],
+            'SVM__kernel': ['linear', 'rbf', 'poly'],
+            'SVM__C': [0.7, 0.8, 0.9, 0.95, 1, 1.05, 1.1, 1.2, 1.5, 2],
+            'SVM__degree': [2, 3, 4],
             'SVM__decision_function_shape': ['ovo', 'ovr'],
-            'SVM__class_weight': [None, 'balanced'],
-            'SVM__max_iter': [-1, 300],
             'SVM__break_ties': [True, False],
-            'SVM__shrinking': [True, False]
         }
     pipe = Pipeline([
         ('scaler', MinMaxScaler()),
diff --git a/log/svm/2020_12_27_22_25_19.log.txt b/log/svm/2020_12_27_22_25_19.log.txt
index 54e1187..7f1d1bd 100644
--- a/log/svm/2020_12_27_22_25_19.log.txt
+++ b/log/svm/2020_12_27_22_25_19.log.txt
@@ -1,4 +1,44 @@
-{'cv': 5, 'error_score': nan, 'estimator__memory': None, 'estimator__steps': [('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))], 'estimator__verbose': False, 'estimator__scaler': MinMaxScaler(), 'estimator__SVM': SVC(cache_size=500), 'estimator__scaler__clip': False, 'estimator__scaler__copy': True, 'estimator__scaler__feature_range': (0, 1), 'estimator__SVM__C': 1.0, 'estimator__SVM__break_ties': False, 'estimator__SVM__cache_size': 500, 'estimator__SVM__class_weight': None, 'estimator__SVM__coef0': 0.0, 'estimator__SVM__decision_function_shape': 'ovr', 'estimator__SVM__degree': 3, 'estimator__SVM__gamma': 'scale', 'estimator__SVM__kernel': 'rbf', 'estimator__SVM__max_iter': -1, 'estimator__SVM__probability': False, 'estimator__SVM__random_state': None, 'estimator__SVM__shrinking': True, 'estimator__SVM__tol': 0.001, 'estimator__SVM__verbose': False, 'estimator': Pipeline(steps=[('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))]), 'n_jobs': -1, 'param_grid': {'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'], 'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100], 'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'], 'SVM__degree': [3, 5], 'SVM__decision_function_shape': ['ovo', 'ovr'], 'SVM__class_weight': [None, 'balanced'], 'SVM__max_iter': [-1, 300], 'SVM__break_ties': [True, False], 'SVM__shrinking': [True, False]}, 'pre_dispatch': '2*n_jobs', 'refit': 'f1', 'return_train_score': False, 'scoring': {'f1': 'f1_macro', 'accuracy': 'accuracy'}, 'verbose': 2}
+{'cv': 5, 'error_score': nan, 'estimator__memory': None,
+ 'estimator__steps': [('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))], 
+ 'estimator__verbose': False, 
+ 'estimator__scaler': MinMaxScaler(), 
+ 'estimator__SVM': SVC(cache_size=500), 
+ 'estimator__scaler__clip': False, 
+ 'estimator__scaler__copy': True, 
+ 'estimator__scaler__feature_range': (0, 1), 
+ 'estimator__SVM__C': 1.0, 
+ 'estimator__SVM__break_ties': False, 
+ 'estimator__SVM__cache_size': 500, 
+ 'estimator__SVM__class_weight': None, 
+ 'estimator__SVM__coef0': 0.0, 
+ 'estimator__SVM__decision_function_shape': 'ovr', 
+ 'estimator__SVM__degree': 3, 
+ 'estimator__SVM__gamma': 'scale', 
+ 'estimator__SVM__kernel': 'rbf', 
+ 'estimator__SVM__max_iter': -1, 
+ 'estimator__SVM__probability': False, 
+ 'estimator__SVM__random_state': None, 
+ 'estimator__SVM__shrinking': True, 
+ 'estimator__SVM__tol': 0.001, 
+ 'estimator__SVM__verbose': False, 
+ 'estimator': Pipeline(steps=[('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))]), 
+ 'n_jobs': -1, 
+ 'param_grid': {
+    'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'], 
+    'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100], 
+    'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'], 
+    'SVM__degree': [3, 5], 
+    'SVM__decision_function_shape': ['ovo', 'ovr'], 
+    'SVM__class_weight': [None, 'balanced'], 
+    'SVM__max_iter': [-1, 300], 
+    'SVM__break_ties': [True, False], 
+    'SVM__shrinking': [True, False]}, 
+    'pre_dispatch': '2*n_jobs', 
+    'refit': 'f1', 
+    'return_train_score': False, 
+    'scoring': {'f1': 'f1_macro', 'accuracy': 'accuracy'}, 
+    'verbose': 2
+ }
 Best score on training set by grid search cross validation: 0.7672520801145016
 Accuracy on test set: 0.7044317369549679
 F1-score on test set: 0.7709042033053236