forked from lazyprogrammer/machine_learning_examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregression.py
146 lines (98 loc) · 2.96 KB
/
regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# NOTE: some people using the default Python
# installation on Mac have had trouble with Axes3D
# Switching to Python 3 (brew install python3) or
# using Linux are both viable work-arounds
# generate and plot the data
N = 500
X = np.random.random((N, 2))*4 - 2 # in between (-2, +2)
Y = X[:,0]*X[:,1] # makes a saddle shape
# note: in this script "Y" will be the target,
# "Yhat" will be prediction
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:,0], X[:,1], Y)
plt.show()
# make a neural network and train it
D = 2
M = 100 # number of hidden units
# layer 1
W = np.random.randn(D, M) / np.sqrt(D)
b = np.zeros(M)
# layer 2
V = np.random.randn(M) / np.sqrt(M)
c = 0
# how to get the output
# consider the params global
def forward(X):
Z = X.dot(W) + b
Z = Z * (Z > 0) # relu
# Z = np.tanh(Z)
Yhat = Z.dot(V) + c
return Z, Yhat
# how to train the params
def derivative_V(Z, Y, Yhat):
return (Y - Yhat).dot(Z)
def derivative_c(Y, Yhat):
return (Y - Yhat).sum()
def derivative_W(X, Z, Y, Yhat, V):
# dZ = np.outer(Y - Yhat, V) * (1 - Z * Z) # this is for tanh activation
dZ = np.outer(Y - Yhat, V) * (Z > 0) # relu
return X.T.dot(dZ)
def derivative_b(Z, Y, Yhat, V):
# dZ = np.outer(Y - Yhat, V) * (1 - Z * Z) # this is for tanh activation
dZ = np.outer(Y - Yhat, V) * (Z > 0) # this is for relu activation
return dZ.sum(axis=0)
def update(X, Z, Y, Yhat, W, b, V, c, learning_rate=1e-4):
gV = derivative_V(Z, Y, Yhat)
gc = derivative_c(Y, Yhat)
gW = derivative_W(X, Z, Y, Yhat, V)
gb = derivative_b(Z, Y, Yhat, V)
V += learning_rate*gV
c += learning_rate*gc
W += learning_rate*gW
b += learning_rate*gb
return W, b, V, c
# so we can plot the costs later
def get_cost(Y, Yhat):
return ((Y - Yhat)**2).mean()
# run a training loop
# plot the costs
# and plot the final result
costs = []
for i in range(200):
Z, Yhat = forward(X)
W, b, V, c = update(X, Z, Y, Yhat, W, b, V, c)
cost = get_cost(Y, Yhat)
costs.append(cost)
if i % 25 == 0:
print(cost)
# plot the costs
plt.plot(costs)
plt.show()
# plot the prediction with the data
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:,0], X[:,1], Y)
# surface plot
line = np.linspace(-2, 2, 20)
xx, yy = np.meshgrid(line, line)
Xgrid = np.vstack((xx.flatten(), yy.flatten())).T
_, Yhat = forward(Xgrid)
ax.plot_trisurf(Xgrid[:,0], Xgrid[:,1], Yhat, linewidth=0.2, antialiased=True)
plt.show()
# plot magnitude of residuals
Ygrid = Xgrid[:,0]*Xgrid[:,1]
R = np.abs(Ygrid - Yhat)
plt.scatter(Xgrid[:,0], Xgrid[:,1], c=R)
plt.show()
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_trisurf(Xgrid[:,0], Xgrid[:,1], R, linewidth=0.2, antialiased=True)
plt.show()