@@ -43,7 +43,7 @@ def neuralNetwork(input_layer_size,hidden_layer_size,out_put_layer):
43
43
initial_nn_params = np .vstack ((initial_Theta1 .reshape (- 1 ,1 ),initial_Theta2 .reshape (- 1 ,1 ))) #展开theta
44
44
#np.savetxt("testTheta.csv",initial_nn_params,delimiter=",")
45
45
start = time .time ()
46
- result = optimize .fmin_cg (nnCostFunction , initial_nn_params , fprime = nnGradient , args = (input_layer_size ,hidden_layer_size ,out_put_layer ,X ,y ,Lambda ))
46
+ result = optimize .fmin_cg (nnCostFunction , initial_nn_params , fprime = nnGradient , args = (input_layer_size ,hidden_layer_size ,out_put_layer ,X ,y ,Lambda ), maxiter = 100 )
47
47
print (u'执行时间:' ,time .time ()- start )
48
48
print (result )
49
49
'''可视化 Theta1'''
@@ -124,14 +124,16 @@ def nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,L
124
124
h = sigmoid (z3 )
125
125
'''代价'''
126
126
J = - (np .dot (np .transpose (class_y .reshape (- 1 ,1 )),np .log (h .reshape (- 1 ,1 )))+ np .dot (np .transpose (1 - class_y .reshape (- 1 ,1 )),np .log (1 - h .reshape (- 1 ,1 )))- Lambda * term / 2 )/ m
127
-
127
+ #temp1 = (h.reshape(-1,1)-class_y.reshape(-1,1))
128
+ #temp2 = (temp1**2).sum()
129
+ #J = 1/(2*m)*temp2
128
130
return np .ravel (J )
129
131
130
132
# 梯度
131
133
def nnGradient (nn_params ,input_layer_size ,hidden_layer_size ,num_labels ,X ,y ,Lambda ):
132
134
length = nn_params .shape [0 ]
133
- Theta1 = nn_params [0 :hidden_layer_size * (input_layer_size + 1 )].reshape (hidden_layer_size ,input_layer_size + 1 )
134
- Theta2 = nn_params [hidden_layer_size * (input_layer_size + 1 ):length ].reshape (num_labels ,hidden_layer_size + 1 )
135
+ Theta1 = nn_params [0 :hidden_layer_size * (input_layer_size + 1 )].reshape (hidden_layer_size ,input_layer_size + 1 ). copy () # 这里使用copy函数,否则下面修改Theta的值,nn_params也会一起修改
136
+ Theta2 = nn_params [hidden_layer_size * (input_layer_size + 1 ):length ].reshape (num_labels ,hidden_layer_size + 1 ). copy ()
135
137
m = X .shape [0 ]
136
138
class_y = np .zeros ((m ,num_labels )) # 数据的y对应0-9,需要映射为0/1的关系
137
139
# 映射y
@@ -146,9 +148,8 @@ def nnGradient(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,Lambd
146
148
147
149
Theta1_grad = np .zeros ((Theta1 .shape )) #第一层到第二层的权重
148
150
Theta2_grad = np .zeros ((Theta2 .shape )) #第二层到第三层的权重
149
-
150
- Theta1 [:,0 ] = 0 ;
151
- Theta2 [:,0 ] = 0 ;
151
+
152
+
152
153
'''正向传播,每次需要补上一列1的偏置bias'''
153
154
a1 = np .hstack ((np .ones ((m ,1 )),X ))
154
155
z2 = np .dot (a1 ,np .transpose (Theta1 ))
@@ -157,15 +158,19 @@ def nnGradient(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,Lambd
157
158
z3 = np .dot (a2 ,np .transpose (Theta2 ))
158
159
h = sigmoid (z3 )
159
160
161
+
160
162
'''反向传播,delta为误差,'''
161
163
delta3 = np .zeros ((m ,num_labels ))
162
164
delta2 = np .zeros ((m ,hidden_layer_size ))
163
165
for i in range (m ):
164
- delta3 [i ,:] = h [i ,:]- class_y [i ,:]
166
+ #delta3[i,:] = (h[i,:]-class_y[i,:])*sigmoidGradient(z3[i,:]) # 均方误差的误差率
167
+ delta3 [i ,:] = h [i ,:]- class_y [i ,:] # 交叉熵误差率
165
168
Theta2_grad = Theta2_grad + np .dot (np .transpose (delta3 [i ,:].reshape (1 ,- 1 )),a2 [i ,:].reshape (1 ,- 1 ))
166
169
delta2 [i ,:] = np .dot (delta3 [i ,:].reshape (1 ,- 1 ),Theta2_x )* sigmoidGradient (z2 [i ,:])
167
170
Theta1_grad = Theta1_grad + np .dot (np .transpose (delta2 [i ,:].reshape (1 ,- 1 )),a1 [i ,:].reshape (1 ,- 1 ))
168
171
172
+ Theta1 [:,0 ] = 0
173
+ Theta2 [:,0 ] = 0
169
174
'''梯度'''
170
175
grad = (np .vstack ((Theta1_grad .reshape (- 1 ,1 ),Theta2_grad .reshape (- 1 ,1 )))+ Lambda * np .vstack ((Theta1 .reshape (- 1 ,1 ),Theta2 .reshape (- 1 ,1 ))))/ m
171
176
return np .ravel (grad )
@@ -223,6 +228,7 @@ def checkGradient(Lambda = 0):
223
228
step [i ]= 0
224
229
# 显示两列比较
225
230
res = np .hstack ((num_grad .reshape (- 1 ,1 ),grad .reshape (- 1 ,1 )))
231
+ print ("检查梯度的结果,第一列为数值法计算得到的,第二列为BP得到的:" )
226
232
print (res )
227
233
228
234
# 初始化调试的theta权重
@@ -256,5 +262,5 @@ def predict(Theta1,Theta2,X):
256
262
return p
257
263
258
264
if __name__ == "__main__" :
259
- # checkGradient()
265
+ checkGradient ()
260
266
neuralNetwork (400 , 25 , 10 )
0 commit comments