@@ -706,7 +706,7 @@ def forward_propagation(
706
706
"""
707
707
:param x_values_seq: a n-length iterable of input points
708
708
:return: list of length (L+2) where the first (L+1) values
709
- each represent the 2-D input arrays (of size n x |I_l |),
709
+ each represent the 2-D input arrays (of size n x |i_l |),
710
710
for each of the (L+1) layers (L of which are hidden layers),
711
711
and the last value represents the output of the DNN (as a
712
712
1-D array of length n)
@@ -743,7 +743,7 @@ def backward_propagation(
743
743
: param obj_deriv_out represents the derivative of the objective
744
744
function with respect to the linear predictor of the final layer.
745
745
746
- :return: list (of length L+1) of |O_l | x |I_l | 2-D arrays,
746
+ :return: list (of length L+1) of |o_l | x |i_l | 2-D arrays,
747
747
i.e., same as the type of self.weights.weights
748
748
This function computes the gradient (with respect to weights) of
749
749
the objective where the output layer activation function
@@ -753,25 +753,25 @@ def backward_propagation(
753
753
back_prop : List [np .ndarray ] = [np .dot (deriv , fwd_prop [- 1 ]) /
754
754
deriv .shape [1 ]]
755
755
# L is the number of hidden layers, n is the number of points
756
- # layer l deriv represents dObj/dS_l where S_l = I_l . weights_l
757
- # (S_l is the result of applying layer l without the activation func)
756
+ # layer l deriv represents dObj/ds_l where s_l = i_l . weights_l
757
+ # (s_l is the result of applying layer l without the activation func)
758
758
for i in reversed (range (len (self .weights ) - 1 )):
759
- # deriv_l is a 2-D array of dimension |O_l | x n
759
+ # deriv_l is a 2-D array of dimension |o_l | x n
760
760
# The recursive formulation of deriv is as follows:
761
- # deriv_{l-1} = (weights_l^T inner deriv_l) haddamard g'(S_ {l-1}),
762
- # which is ((|I_l | x |O_l |) inner (|O_l | x n)) haddamard
763
- # (|I_l | x n), which is (|I_l | x n) = (|O_ {l-1}| x n)
764
- # Note: g'(S_ {l-1}) is expressed as hidden layer activation
765
- # derivative as a function of O_ {l-1} (=I_l ).
761
+ # deriv_{l-1} = (weights_l^T inner deriv_l) haddamard g'(s_ {l-1}),
762
+ # which is ((|i_l | x |o_l |) inner (|o_l | x n)) haddamard
763
+ # (|i_l | x n), which is (|i_l | x n) = (|o_ {l-1}| x n)
764
+ # Note: g'(s_ {l-1}) is expressed as hidden layer activation
765
+ # derivative as a function of o_ {l-1} (=i_l ).
766
766
deriv = np .dot (self .weights [i + 1 ].weights .T , deriv ) * \
767
767
self .dnn_spec .hidden_activation_deriv (fwd_prop [i + 1 ].T )
768
- # If self.dnn_spec.bias is True, then I_l = O_ {l-1} + 1, in which
768
+ # If self.dnn_spec.bias is True, then i_l = o_ {l-1} + 1, in which
769
769
# case # the first row of the calculated deriv is removed to yield
770
- # a 2-D array of dimension |O_ {l-1}| x n.
770
+ # a 2-D array of dimension |o_ {l-1}| x n.
771
771
if self .dnn_spec .bias :
772
772
deriv = deriv [1 :]
773
773
# layer l gradient is deriv_l inner fwd_prop[l], which is
774
- # of dimension (|O_l | x n) inner (n x (|I_l |) = |O_l | x |I_l |
774
+ # of dimension (|o_l | x n) inner (n x (|i_l |) = |o_l | x |i_l |
775
775
back_prop .append (np .dot (deriv , fwd_prop [i ]) / deriv .shape [1 ])
776
776
return back_prop [::- 1 ]
777
777
0 commit comments