Skip to content

Commit

Permalink
fine tune svm & update gitignore
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexhaoge committed Dec 27, 2020
1 parent a16bc11 commit 30229c0
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 11 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,5 @@ dmypy.json
.pyre/

nohup.out
*.nohup.out
/.vscode/
30 changes: 20 additions & 10 deletions MLSR/primary.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from time import strftime, localtime
from .data import DataSet
from .plot import plot_confusion_matrix
from os import mkdir


def lower_bound(cv_results):
@DeprecationWarning
def lower_bound(cv_results: dict):
"""
Calculate the lower bound within 1 standard deviation
of the best `mean_test_scores`.
Expand All @@ -34,7 +34,8 @@ def lower_bound(cv_results):
- cv_results['std_test_score'][best_score_idx])


def best_low_complexity(cv_results):
@DeprecationWarning
def best_low_complexity(cv_results: dict):
"""
Balance model complexity with cross-validated score.
Author: Wenhao Zhang <[email protected]>
Expand Down Expand Up @@ -203,16 +204,25 @@ def do_svm(dataset: DataSet, log_dir: str = '../log', grid: dict = None):
"""
from sklearn.svm import SVC
if grid is None:
# rough grid
# grid = {
# 'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
# 'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100],
# 'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'],
# 'SVM__degree': [3, 5],
# 'SVM__decision_function_shape': ['ovo', 'ovr'],
# 'SVM__class_weight': [None, 'balanced'],
# 'SVM__max_iter': [-1, 300],
# 'SVM__break_ties': [True, False],
# 'SVM__shrinking': [True, False]
# }
# fine grid
grid = {
'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100],
'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'],
'SVM__degree': [3, 5],
'SVM__kernel': ['linear', 'rbf', 'poly'],
'SVM__C': [0.7, 0.8, 0.9, 0.95, 1, 1.05, 1.1, 1.2, 1.5, 2],
'SVM__degree': [2, 3, 4],
'SVM__decision_function_shape': ['ovo', 'ovr'],
'SVM__class_weight': [None, 'balanced'],
'SVM__max_iter': [-1, 300],
'SVM__break_ties': [True, False],
'SVM__shrinking': [True, False]
}
pipe = Pipeline([
('scaler', MinMaxScaler()),
Expand Down
42 changes: 41 additions & 1 deletion log/svm/2020_12_27_22_25_19.log.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,44 @@
{'cv': 5, 'error_score': nan, 'estimator__memory': None, 'estimator__steps': [('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))], 'estimator__verbose': False, 'estimator__scaler': MinMaxScaler(), 'estimator__SVM': SVC(cache_size=500), 'estimator__scaler__clip': False, 'estimator__scaler__copy': True, 'estimator__scaler__feature_range': (0, 1), 'estimator__SVM__C': 1.0, 'estimator__SVM__break_ties': False, 'estimator__SVM__cache_size': 500, 'estimator__SVM__class_weight': None, 'estimator__SVM__coef0': 0.0, 'estimator__SVM__decision_function_shape': 'ovr', 'estimator__SVM__degree': 3, 'estimator__SVM__gamma': 'scale', 'estimator__SVM__kernel': 'rbf', 'estimator__SVM__max_iter': -1, 'estimator__SVM__probability': False, 'estimator__SVM__random_state': None, 'estimator__SVM__shrinking': True, 'estimator__SVM__tol': 0.001, 'estimator__SVM__verbose': False, 'estimator': Pipeline(steps=[('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))]), 'n_jobs': -1, 'param_grid': {'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'], 'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100], 'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'], 'SVM__degree': [3, 5], 'SVM__decision_function_shape': ['ovo', 'ovr'], 'SVM__class_weight': [None, 'balanced'], 'SVM__max_iter': [-1, 300], 'SVM__break_ties': [True, False], 'SVM__shrinking': [True, False]}, 'pre_dispatch': '2*n_jobs', 'refit': 'f1', 'return_train_score': False, 'scoring': {'f1': 'f1_macro', 'accuracy': 'accuracy'}, 'verbose': 2}
{'cv': 5, 'error_score': nan, 'estimator__memory': None,
'estimator__steps': [('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))],
'estimator__verbose': False,
'estimator__scaler': MinMaxScaler(),
'estimator__SVM': SVC(cache_size=500),
'estimator__scaler__clip': False,
'estimator__scaler__copy': True,
'estimator__scaler__feature_range': (0, 1),
'estimator__SVM__C': 1.0,
'estimator__SVM__break_ties': False,
'estimator__SVM__cache_size': 500,
'estimator__SVM__class_weight': None,
'estimator__SVM__coef0': 0.0,
'estimator__SVM__decision_function_shape': 'ovr',
'estimator__SVM__degree': 3,
'estimator__SVM__gamma': 'scale',
'estimator__SVM__kernel': 'rbf',
'estimator__SVM__max_iter': -1,
'estimator__SVM__probability': False,
'estimator__SVM__random_state': None,
'estimator__SVM__shrinking': True,
'estimator__SVM__tol': 0.001,
'estimator__SVM__verbose': False,
'estimator': Pipeline(steps=[('scaler', MinMaxScaler()), ('SVM', SVC(cache_size=500))]),
'n_jobs': -1,
'param_grid': {
'SVM__kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
'SVM__C': [0.01, 0.1, 0.5, 1, 5, 10, 100],
'SVM__gamma': [0.0001, 0.001, 0.01, 'scale', 'auto'],
'SVM__degree': [3, 5],
'SVM__decision_function_shape': ['ovo', 'ovr'],
'SVM__class_weight': [None, 'balanced'],
'SVM__max_iter': [-1, 300],
'SVM__break_ties': [True, False],
'SVM__shrinking': [True, False]},
'pre_dispatch': '2*n_jobs',
'refit': 'f1',
'return_train_score': False,
'scoring': {'f1': 'f1_macro', 'accuracy': 'accuracy'},
'verbose': 2
}
Best score on training set by grid search cross validation: 0.7672520801145016
Accuracy on test set: 0.7044317369549679
F1-score on test set: 0.7709042033053236
Expand Down

0 comments on commit 30229c0

Please sign in to comment.