Skip to content

Commit

Permalink
tpot
Browse files Browse the repository at this point in the history
  • Loading branch information
davidgtang committed Jan 30, 2017
1 parent b35e87b commit 22bdc81
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 69 deletions.
107 changes: 39 additions & 68 deletions CarlosFuerte/ML_Submission.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"metadata": {
"collapsed": false
},
Expand Down Expand Up @@ -159,7 +159,7 @@
"max 84.400000 8.094000 2.000000 1.000000 "
]
},
"execution_count": 3,
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -193,7 +193,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 2,
"metadata": {
"collapsed": false
},
Expand Down Expand Up @@ -242,15 +242,31 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"outputs": [
{
"ename": "ImportError",
"evalue": "cannot import name '_safe_split'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-3-528f8f7500fb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpreprocessing\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_selection\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mtpot\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mTPOTClassifier\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpipeline\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mmake_pipeline\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmake_union\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensemble\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mExtraTreesClassifier\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mVotingClassifier\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/dgt377/anaconda/lib/python3.5/site-packages/sklearn/model_selection/__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_split\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcheck_cv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_validation\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_validation\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcross_val_predict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_validation\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mlearning_curve\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/Users/dgt377/anaconda/lib/python3.5/site-packages/sklearn/model_selection/_validation.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfixes\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mastype\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalidation\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0m_is_arraylike\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_num_samples\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetaestimators\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0m_safe_split\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 27\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexternals\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoblib\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mParallel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdelayed\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetrics\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscorer\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mcheck_scoring\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mImportError\u001b[0m: cannot import name '_safe_split'"
]
}
],
"source": [
"from sklearn import preprocessing\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier\n",
"from tpot import TPOTClassifier \n",
"from sklearn.pipeline import make_pipeline, make_union\n",
"from sklearn.ensemble import ExtraTreesClassifier, VotingClassifier\n",
"\n",
"scaler = preprocessing.StandardScaler().fit(feature_vectors)\n",
"scaled_features = scaler.transform(feature_vectors)\n",
Expand All @@ -268,47 +284,18 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Pred SS CSiS FSiS SiSh MS WS D PS BS Total\n",
" True\n",
" SS 44 9 1 54\n",
" CSiS 1 178 23 1 1 1 205\n",
" FSiS 3 26 119 1 3 152\n",
" SiSh 40 1 8 3 52\n",
" MS 1 4 36 9 1 3 54\n",
" WS 1 1 7 13 75 1 13 1 112\n",
" D 1 1 22 3 2 29\n",
" PS 1 3 5 4 12 2 102 2 131\n",
" BS 41 41\n",
"Facies classification accuracy (NN) = 0.791566\n",
"Facies classification accuracy (OneVsRest) = 0.801205\n",
"Adjacent facies classification accuracy (NN) = 0.936145\n",
"Adjacent facies classification accuracy (OneVsRest) = 0.948193\n"
]
}
],
"outputs": [],
"source": [
"from sklearn.neural_network import MLPClassifier\n",
"\n",
"sizes = (200,100,100)\n",
"clfNN = MLPClassifier(solver='lbfgs', alpha=.015,\n",
" hidden_layer_sizes=sizes, random_state=15)\n",
"clfOne = OneVsRestClassifier(MLPClassifier(solver='lbfgs', alpha=.015,\n",
" hidden_layer_sizes=sizes, random_state=15), n_jobs = -1)\n",
"# Use extra trees\n",
"clfExtra = make_pipeline(\n",
" ExtraTreesClassifier(criterion=\"gini\", max_features=0.53, n_estimators=500))\n",
"clfExtra.fit(X_train, y_train)\n",
"\n",
"clfNN.fit(X_train,y_train)\n",
"clfOne.fit(X_train,y_train)\n",
"\n",
"predicted_NN = clfNN.predict(X_test)\n",
"predicted_One = clfOne.predict(X_test)\n",
"predicted = clfExtra.predict(X_test)\n",
"\n",
"from sklearn.metrics import confusion_matrix\n",
"from classification_utilities import display_cm, display_adj_cm\n",
Expand Down Expand Up @@ -353,34 +340,14 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"OneVsRestClassifier(estimator=MLPClassifier(activation='relu', alpha=0.015, batch_size='auto', beta_1=0.9,\n",
" beta_2=0.999, early_stopping=False, epsilon=1e-08,\n",
" hidden_layer_sizes=(200, 100, 100), learning_rate='constant',\n",
" learning_rate_init=0.001, max_iter=200, momentum=0.9,\n",
" nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,\n",
" solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,\n",
" warm_start=False),\n",
" n_jobs=-1)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"clf_final = OneVsRestClassifier(MLPClassifier(solver='lbfgs', alpha=.015,\n",
" hidden_layer_sizes=sizes, random_state=1),n_jobs = -1)\n",
"\n",
"clf_final.fit(scaled_features,correct_facies_labels)"
"# Retrain on all data\n",
"clfExtra.fit(scaled_features, correct_facies_labels)"
]
},
{
Expand All @@ -400,7 +367,11 @@
"source": [
"# Normalize data\n",
"scaled_validation = scaler.transform(validation)\n",
"validation_output = clf_final.predict(scaled_validation)"
"validation_output = clfExtra.predict(scaled_validation)\n",
"\n",
"from scipy.signal import medfilt\n",
"# Smooth data\n",
"validation_output = medfilt(validation_output,kernel_size = 3)"
]
},
{
Expand Down Expand Up @@ -488,7 +459,7 @@
},
"outputs": [],
"source": [
"validationFull.to_csv('TangDarnell.csv')"
"validationFull.to_csv('TangDarnell_sub3.csv')"
]
}
],
Expand Down
1 change: 0 additions & 1 deletion CarlosFuerte/ML_Submission.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# coding: utf-8

# # Machine Learning Contest
Expand Down
19 changes: 19 additions & 0 deletions CarlosFuerte/tpot_mnist_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import numpy as np

from sklearn.ensemble import ExtraTreesClassifier, VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import FunctionTransformer

# NOTE: Make sure that the class is labeled 'class' in the data file
tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)
training_features, testing_features, training_classes, testing_classes = \
train_test_split(features, tpot_data['class'], random_state=42)

exported_pipeline = make_pipeline(
ExtraTreesClassifier(criterion="gini", max_features=0.53, n_estimators=500)
)

exported_pipeline.fit(training_features, training_classes)
results = exported_pipeline.predict(testing_features)
Binary file added CarlosFuerte/y_pred_Matt.npy
Binary file not shown.

0 comments on commit 22bdc81

Please sign in to comment.