-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHyperParameterTuning.py
70 lines (54 loc) · 2.35 KB
/
HyperParameterTuning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
#Created by Parth Patel, DBI @ University of Delaware, Newark, Delaware 19717
#Date created: 12/03/2017
##This script builds Random Forest classifier and perfroms grid search for parameter tuning or optimization.
#usage: python3 HyperParameterTuning.py
import os,sys
import numpy as np
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
InputFile='Features.csv'
OutputFile="Performance_Report.txt"
FeatureSet = pd.read_csv(InputFile,sep=',')
FeatureSet.head()
header= FeatureSet.columns.tolist()
cols=header[:-1]
colsRes = ['class']
Features = FeatureSet.as_matrix(cols) #training array - put 1-1372 features here with thier heading.
Feature_labels = FeatureSet.as_matrix(colsRes) # training results- put class value here.
col,row=Feature_labels.shape
Feature_labels = Feature_labels.reshape(col,)
X,y= Features,Feature_labels
print("Step1: Dataset is loaded")
clf = RandomForestClassifier(criterion= "entropy")
# Utility function to report best scores
def report(results, n_top=3):
for i in range(1, n_top + 1):
candidates = np.flatnonzero(results['rank_test_score'] == i)
for candidate in candidates:
print("Model with rank: {0}".format(i))
print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
results['mean_test_score'][candidate],
results['std_test_score'][candidate]))
print("Parameters: {0}".format(results['params'][candidate]))
print("")
# use a full grid over all parameters
param_grid = {"max_depth": [3,4,5,6,7,8,None],
"max_features": [1, 3, 10],
"min_samples_split": [2, 3, 10],
"min_samples_leaf": [1, 3, 10],
"bootstrap": [True, False],
"n_estimators":[10,20,50,100,150,200]}
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)
print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
% (time() - start, len(grid_search.cv_results_['params'])))
report(grid_search.cv_results_)
sys.exit()
#----------------------------------------------------------------------------