-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathdense_layer.py
68 lines (51 loc) · 1.98 KB
/
dense_layer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import initialization
class DenseLayer:
def __init__(self, name, shape, l2reg=0, init_method='glorot_uniform'):
self._name = name
self._l2reg = l2reg
self._W = initialization.get_global_init(init_method)(shape)
self._b = initialization.get_global_init('zero')(shape[1])
self._dW = None
self._db = None
self._last_input = None
def forward(self, X):
self._last_input = X
# last_input: [batch_size, fan_in]
# W: [fan_in, fan_out]
# b: [fanout]
# result: [batch_size, fan_out]
return np.dot(self._last_input, self._W) + self._b
def backward(self, prev_grads):
# prev_grads: [batch_size, fan_out]
assert prev_grads.shape[1] == self._W.shape[1]
# self._last_input.T: [fan_in, batch_size]
# prev_grads: [batch_size, fan_out]
# dW: [fan_in, fan_out], same shape as W
self._dW = np.dot(self._last_input.T, prev_grads)
# 加上l2_loss对W的导数
self._dW += self._l2reg * self._W
# 把b想像成特殊的fan_in=1的W,则套用上面的公式
# db = [1,1,...,1](共batch_size个1,shape=[1,batch_size])*prev_grads([batch_size,fan_out])=各列之和([1,fan_out])
self._db = np.sum(prev_grads, axis=0)
# return: dLoss/dX: [batch_size, fan_in]
# prev_grads: [batch_size, fan_out]
# self._W.T: [fan_out,fan_in]
return np.dot(prev_grads, self._W.T)
@property
def l2reg_loss(self):
return 0.5 * self._l2reg * np.sum(self._W ** 2)
@property
def shape(self):
return self._W.shape
@property
def output_dim(self):
return self._W.shape[1]
@property
def variables(self):
return {"{}_W".format(self._name): self._W,
"{}_b".format(self._name): self._b}
@property
def grads2var(self):
return {"{}_W".format(self._name): self._dW,
"{}_b".format(self._name): self._db}