Skip to content

Commit

Permalink
modifed rf-python3.6.py
Browse files Browse the repository at this point in the history
  • Loading branch information
xuehuachunsheng committed May 18, 2018
1 parent 7a08e0b commit 40be623
Showing 1 changed file with 16 additions and 20 deletions.
36 changes: 16 additions & 20 deletions src/python/getting-started/digit-recognizer/rf-python3.6.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,45 @@
Author: 平淡的天
Github: https://github.com/apachecn/kaggle
'''

import os.path
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import pandas as pd
# from sklearn.grid_search import GridSearchCV
# from numpy import arange
# from lightgbm import LGBMClassifier
data_dir = \
r'/Users/wuyanxue/Documents/GitHub/datasets/getting-started/digit-recognizer/'

train_data = pd.read_csv(r"C:\Users\312\Desktop\digit-recognizer\train.csv")
test_data = pd.read_csv(r"C:\Users\312\Desktop\digit-recognizer\test.csv")
train_data = pd.read_csv(os.path.join(data_dir, 'input/train.csv'))
test_data = pd.read_csv(os.path.join(data_dir, 'input/test.csv'))
data = pd.concat([train_data, test_data], axis=0).reset_index(drop=True)
data.drop(['label'], axis=1, inplace=True)
label = train_data.label

pca = PCA(n_components=100, random_state=34)
data_pca = pca.fit_transform(data)

Xtrain, Ytrain, xtest, ytest = train_test_split(
X_train, X_test, y_train, y_test = train_test_split(
data_pca[0:len(train_data)], label, test_size=0.1, random_state=34)

clf = RandomForestClassifier(
n_estimators=110,
max_depth=5,
min_samples_split=2,
n_estimators=100,
max_depth=20,
min_samples_split=20,
min_samples_leaf=1,
random_state=34)

# clf=LGBMClassifier(num_leaves=63, max_depth=7, n_estimators=80, n_jobs=20)

# param_test1 = {'n_estimators':arange(10,150,10),'max_depth':arange(1,11,1)}
# gsearch1 = GridSearchCV(estimator = clf, param_grid = param_test1, scoring='accuracy',iid=False,cv=5)
# gsearch1.fit(Xtrain,xtest)
# print(gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_)

clf.fit(Xtrain, xtest)
y_predict = clf.predict(Ytrain)
clf.fit(X_train, y_train)
y_predict = clf.predict(X_test)

zeroLable = ytest - y_predict
zeroLable = y_test - y_predict
rightCount = 0
for i in range(len(zeroLable)):
if list(zeroLable)[i] == 0:
Expand All @@ -53,12 +53,8 @@

result = clf.predict(data_pca[len(train_data):])

i = 0
fw = open("C:\\Users\\312\\Desktop\\digit-recognizer\\result.csv", 'w')
with open('C:\\Users\\312\\Desktop\\digit-recognizer\\sample_submission.csv'
) as pred_file:
n, _ = test_data.shape
with open(os.path.join(data_dir, 'output/Result_sklearn_RF.csv'), 'w') as fw:
fw.write('{},{}\n'.format('ImageId', 'Label'))
for line in pred_file.readlines()[1:]:
splits = line.strip().split(',')
fw.write('{},{}\n'.format(splits[0], result[i]))
i += 1
for i in range(1, n + 1):
fw.write('{},{}\n'.format(i, result[i - 1]))

0 comments on commit 40be623

Please sign in to comment.