Skip to content

Commit

Permalink
Merge pull request apache#1580 from linmx0130/master
Browse files Browse the repository at this point in the history
add AdaGrad optimizer in python, a revised version
  • Loading branch information
piiswrong committed Mar 9, 2016
2 parents f91b2e5 + e20de1a commit 32375b8
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions python/mxnet/optimizer.py
Original file line number Diff line number Diff line change
@@ -546,6 +546,42 @@ def update(self, index, weight, grad, state):
mean[:] = mean_t
variance[:] = variance_t

@register
class AdaGrad(Optimizer):
"""AdaGrad optimizer of Duchi et al., 2011,
This code follows the version in http://arxiv.org/pdf/1212.5701v1.pdf Eq(5)
by Matthew D. Zeiler, 2012. AdaGrad will help the network to converge faster
in some cases.
Parameters
----------
learning_rate : float, optional
Step size.
Default value is set to 0.05.
wd : float, optional
L2 regularization coefficient add to all the weights
rescale_grad : float, optional
rescaling factor of gradient.
eps: float, optional
A small float number to make the updating processing stable
Default value is set to 1e-7.
"""
def __init__(self, learning_rate=0.05, wd=0., rescale_grad=1, eps=1e-7, arg_names=None):
super(AdaGrad, self).__init__(rescale_grad, arg_names, wd)
self.lr = learning_rate
self.float_stable_eps = eps
self.rescale_grad = rescale_grad
def create_state(self, index, weight):
return zeros(weight.shape, weight.context) #history
def update(self, index, weight, grad, state):
assert(isinstance(weight, NDArray))
assert(isinstance(grad, NDArray))
grad = grad * self.rescale_grad
history = state
history[:] += (grad * grad)
weight[:] += -self.lr * (grad / sqrt(history + self.float_stable_eps) + self.wd * weight)

@register
class RMSProp(Optimizer):
"""RMSProp optimizer of Tieleman & Hinton, 2012,

0 comments on commit 32375b8

Please sign in to comment.