1
+ {
2
+ "cells" : [
3
+ {
4
+ "cell_type" : " code" ,
5
+ "source" : [
6
+ " # Import needed libraries\n " ,
7
+ " import numpy as np\n " ,
8
+ " from sklearn.preprocessing import StandardScaler\n " ,
9
+ " from sklearn.pipeline import Pipeline\n " ,
10
+ " from sklearn.linear_model import SGDClassifier"
11
+ ],
12
+ "outputs" : [],
13
+ "execution_count" : 1 ,
14
+ "metadata" : {}
15
+ },
16
+ {
17
+ "cell_type" : " code" ,
18
+ "source" : [
19
+ " # Read data\n " ,
20
+ " X_train = np.genfromtxt('X_train.csv', delimiter=',', skip_header=1)\n " ,
21
+ " X_test = np.genfromtxt('X_test.csv', delimiter=',', skip_header=1)\n " ,
22
+ " y_train = np.genfromtxt('y_train.csv', delimiter=',', skip_header=1)\n " ,
23
+ " y_test = np.genfromtxt('y_test.csv', delimiter=',', skip_header=1)"
24
+ ],
25
+ "outputs" : [],
26
+ "execution_count" : 2 ,
27
+ "metadata" : {}
28
+ },
29
+ {
30
+ "cell_type" : " code" ,
31
+ "source" : [
32
+ " classifiers = [\n " ,
33
+ " SGDClassifier(penalty='l2', loss='log', alpha=0.0001)\n " ,
34
+ " ]"
35
+ ],
36
+ "outputs" : [],
37
+ "execution_count" : 4 ,
38
+ "metadata" : {
39
+ "collapsed" : false ,
40
+ "jupyter" : {
41
+ "source_hidden" : false ,
42
+ "outputs_hidden" : false
43
+ },
44
+ "nteract" : {
45
+ "transient" : {
46
+ "deleting" : false
47
+ }
48
+ }
49
+ }
50
+ },
51
+ {
52
+ "cell_type" : " code" ,
53
+ "source" : [],
54
+ "outputs" : [],
55
+ "execution_count" : 4 ,
56
+ "metadata" : {
57
+ "collapsed" : false ,
58
+ "jupyter" : {
59
+ "source_hidden" : false ,
60
+ "outputs_hidden" : false
61
+ },
62
+ "nteract" : {
63
+ "transient" : {
64
+ "deleting" : false
65
+ }
66
+ }
67
+ }
68
+ },
69
+ {
70
+ "cell_type" : " code" ,
71
+ "source" : [
72
+ " def score_model(X_train, y_train, X_test, y_test, model):\n " ,
73
+ " model = Pipeline(steps=[\n " ,
74
+ " ('scale_x', StandardScaler()),\n " ,
75
+ " ('clf', model)\n " ,
76
+ " ])\n " ,
77
+ " \n " ,
78
+ " model_name = model.named_steps['clf'].__class__.__name__\n " ,
79
+ " \n " ,
80
+ " model.fit(X_train, y_train)\n " ,
81
+ " \n " ,
82
+ " print(f'Train score for {model_name}: ', round(model.score(X_train, y_train), 4))\n " ,
83
+ " print(f'Test score for {model_name}: ', round(model.score(X_test, y_test), 4))\n " ,
84
+ " print()"
85
+ ],
86
+ "outputs" : [],
87
+ "execution_count" : 5 ,
88
+ "metadata" : {
89
+ "collapsed" : false ,
90
+ "jupyter" : {
91
+ "source_hidden" : false ,
92
+ "outputs_hidden" : false
93
+ },
94
+ "nteract" : {
95
+ "transient" : {
96
+ "deleting" : false
97
+ }
98
+ }
99
+ }
100
+ },
101
+ {
102
+ "cell_type" : " code" ,
103
+ "source" : [],
104
+ "outputs" : [],
105
+ "execution_count" : 5 ,
106
+ "metadata" : {
107
+ "collapsed" : false ,
108
+ "jupyter" : {
109
+ "source_hidden" : false ,
110
+ "outputs_hidden" : false
111
+ },
112
+ "nteract" : {
113
+ "transient" : {
114
+ "deleting" : false
115
+ }
116
+ }
117
+ }
118
+ },
119
+ {
120
+ "cell_type" : " code" ,
121
+ "source" : [
122
+ " for clf in classifiers:\n " ,
123
+ " score_model(X_train, y_train, X_test, y_test, clf)"
124
+ ],
125
+ "outputs" : [
126
+ {
127
+ "output_type" : " stream" ,
128
+ "name" : " stdout" ,
129
+ "text" : [
130
+ " Train score for SGDClassifier: 0.9791\n " ,
131
+ " Test score for SGDClassifier: 0.9807\n " ,
132
+ " \n "
133
+ ]
134
+ }
135
+ ],
136
+ "execution_count" : 6 ,
137
+ "metadata" : {
138
+ "collapsed" : false ,
139
+ "jupyter" : {
140
+ "source_hidden" : false ,
141
+ "outputs_hidden" : false
142
+ },
143
+ "nteract" : {
144
+ "transient" : {
145
+ "deleting" : false
146
+ }
147
+ }
148
+ }
149
+ },
150
+ {
151
+ "cell_type" : " code" ,
152
+ "source" : [],
153
+ "outputs" : [],
154
+ "execution_count" : null ,
155
+ "metadata" : {
156
+ "collapsed" : false ,
157
+ "jupyter" : {
158
+ "source_hidden" : false ,
159
+ "outputs_hidden" : false
160
+ },
161
+ "nteract" : {
162
+ "transient" : {
163
+ "deleting" : false
164
+ }
165
+ }
166
+ }
167
+ },
168
+ {
169
+ "cell_type" : " code" ,
170
+ "source" : [],
171
+ "outputs" : [],
172
+ "execution_count" : null ,
173
+ "metadata" : {
174
+ "collapsed" : false ,
175
+ "jupyter" : {
176
+ "source_hidden" : false ,
177
+ "outputs_hidden" : false
178
+ },
179
+ "nteract" : {
180
+ "transient" : {
181
+ "deleting" : false
182
+ }
183
+ }
184
+ }
185
+ },
186
+ {
187
+ "cell_type" : " code" ,
188
+ "source" : [],
189
+ "outputs" : [],
190
+ "execution_count" : null ,
191
+ "metadata" : {
192
+ "collapsed" : false ,
193
+ "jupyter" : {
194
+ "source_hidden" : false ,
195
+ "outputs_hidden" : false
196
+ },
197
+ "nteract" : {
198
+ "transient" : {
199
+ "deleting" : false
200
+ }
201
+ }
202
+ }
203
+ },
204
+ {
205
+ "cell_type" : " code" ,
206
+ "source" : [],
207
+ "outputs" : [],
208
+ "execution_count" : null ,
209
+ "metadata" : {
210
+ "collapsed" : false ,
211
+ "jupyter" : {
212
+ "source_hidden" : false ,
213
+ "outputs_hidden" : false
214
+ },
215
+ "nteract" : {
216
+ "transient" : {
217
+ "deleting" : false
218
+ }
219
+ }
220
+ }
221
+ },
222
+ {
223
+ "cell_type" : " code" ,
224
+ "source" : [],
225
+ "outputs" : [],
226
+ "execution_count" : null ,
227
+ "metadata" : {
228
+ "collapsed" : false ,
229
+ "jupyter" : {
230
+ "source_hidden" : false ,
231
+ "outputs_hidden" : false
232
+ },
233
+ "nteract" : {
234
+ "transient" : {
235
+ "deleting" : false
236
+ }
237
+ }
238
+ }
239
+ },
240
+ {
241
+ "cell_type" : " code" ,
242
+ "source" : [],
243
+ "outputs" : [],
244
+ "execution_count" : null ,
245
+ "metadata" : {
246
+ "collapsed" : false ,
247
+ "jupyter" : {
248
+ "source_hidden" : false ,
249
+ "outputs_hidden" : false
250
+ },
251
+ "nteract" : {
252
+ "transient" : {
253
+ "deleting" : false
254
+ }
255
+ }
256
+ }
257
+ },
258
+ {
259
+ "cell_type" : " code" ,
260
+ "source" : [],
261
+ "outputs" : [],
262
+ "execution_count" : null ,
263
+ "metadata" : {
264
+ "collapsed" : false ,
265
+ "jupyter" : {
266
+ "source_hidden" : false ,
267
+ "outputs_hidden" : false
268
+ },
269
+ "nteract" : {
270
+ "transient" : {
271
+ "deleting" : false
272
+ }
273
+ }
274
+ }
275
+ }
276
+ ],
277
+ "metadata" : {
278
+ "kernelspec" : {
279
+ "display_name" : " Python 3" ,
280
+ "language" : " python" ,
281
+ "name" : " python3"
282
+ },
283
+ "language_info" : {
284
+ "name" : " python" ,
285
+ "version" : " 3.7.6" ,
286
+ "mimetype" : " text/x-python" ,
287
+ "codemirror_mode" : {
288
+ "name" : " ipython" ,
289
+ "version" : 3
290
+ },
291
+ "pygments_lexer" : " ipython3" ,
292
+ "nbconvert_exporter" : " python" ,
293
+ "file_extension" : " .py"
294
+ },
295
+ "nteract" : {
296
+ "version" : " 0.21.0"
297
+ }
298
+ },
299
+ "nbformat" : 4 ,
300
+ "nbformat_minor" : 4
301
+ }
0 commit comments