Skip to content

Commit

Permalink
change the way we use learn_rate
Browse files Browse the repository at this point in the history
  • Loading branch information
liuliu committed Feb 25, 2014
1 parent 3156ec0 commit 2742097
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 13 deletions.
4 changes: 2 additions & 2 deletions bin/image-net.c
Original file line number Diff line number Diff line change
Expand Up @@ -391,10 +391,10 @@ int main(int argc, char** argv)
for (i = 0; i < 13; i++)
{
layer_params[i].w.decay = 0.0005;
layer_params[i].w.learn_rate = 0.0000004;
layer_params[i].w.learn_rate = 0.000001;
layer_params[i].w.momentum = 0.9;
layer_params[i].bias.decay = 0;
layer_params[i].bias.learn_rate = 0.0000004;
layer_params[i].bias.learn_rate = 0.000001;
layer_params[i].bias.momentum = 0.9;
}
layer_params[10].dor = 0.5;
Expand Down
3 changes: 2 additions & 1 deletion lib/ccv_basic.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ void ccv_flip(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int btype, int type
btype = CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type);
*b = db = ccv_dense_matrix_renew(*b, a->rows, a->cols, btype, btype, sig);
ccv_object_return_if_cached(, db);
memcpy(db->data.u8, a->data.u8, a->rows * a->step);
if (a->data.u8 != db->data.u8)
memcpy(db->data.u8, a->data.u8, a->rows * a->step);
}
if (type & CCV_FLIP_Y)
_ccv_flip_y_self(db);
Expand Down
22 changes: 12 additions & 10 deletions lib/cuda/cwc_convnet.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1556,7 +1556,7 @@ static void _cwc_convnet_tests_return(int batch, int count, float* a, int* c, co
template <int momentum_read>
__global__ static void _cwc_kern_net_sgd(float* a, float* grad, float* momentum,
const int count,
const float learn_rate, const float momentum_rate, const float decay)
const float learn_rate, const float momentum_rate, const float decay_and_learn)
{
if (blockIdx.x * blockDim.x + threadIdx.x < count)
{
Expand All @@ -1565,7 +1565,7 @@ __global__ static void _cwc_kern_net_sgd(float* a, float* grad, float* momentum,
momentum += blockIdx.x * blockDim.x;
const int thidx = threadIdx.x;
float old_a = a[thidx];
float velocity = (momentum_read ? momentum_rate * momentum[thidx] : 0) - decay * learn_rate * old_a + learn_rate * grad[thidx];
float velocity = (momentum_read ? momentum_rate * momentum[thidx] : 0) - decay_and_learn * old_a + learn_rate * grad[thidx];
a[thidx] = velocity + old_a;
momentum[thidx] = velocity;
}
Expand Down Expand Up @@ -1595,23 +1595,23 @@ static void _cwc_convnet_net_sgd(ccv_convnet_t* convnet, int momentum_read, int
<1>
<<<num_blocks_for_coeff, threads_per_block, 0, context->device.stream>>>
(layer->w, configuration->w, momentum->w, layer->wnum,
layer_params[i].w.learn_rate, layer_params[i].w.momentum, layer_params[i].w.decay);
layer_params[i].w.learn_rate / batch, layer_params[i].w.momentum, layer_params[i].w.decay * layer_params[i].w.learn_rate);
_cwc_kern_net_sgd
<1>
<<<num_blocks_for_bias, threads_per_block, 0, context->device.stream>>>
(layer->bias, configuration->bias, momentum->bias, layer->net.convolutional.count,
layer_params[i].bias.learn_rate, layer_params[i].bias.momentum, layer_params[i].bias.decay);
layer_params[i].bias.learn_rate / batch, layer_params[i].bias.momentum, layer_params[i].bias.decay * layer_params[i].bias.learn_rate);
} else {
_cwc_kern_net_sgd
<0>
<<<num_blocks_for_coeff, threads_per_block, 0, context->device.stream>>>
(layer->w, configuration->w, momentum->w, layer->wnum,
layer_params[i].w.learn_rate, layer_params[i].w.momentum, layer_params[i].w.decay);
layer_params[i].w.learn_rate / batch, layer_params[i].w.momentum, layer_params[i].w.decay * layer_params[i].w.learn_rate);
_cwc_kern_net_sgd
<0>
<<<num_blocks_for_bias, threads_per_block, 0, context->device.stream>>>
(layer->bias, configuration->bias, momentum->bias, layer->net.convolutional.count,
layer_params[i].bias.learn_rate, layer_params[i].bias.momentum, layer_params[i].bias.decay);
layer_params[i].bias.learn_rate / batch, layer_params[i].bias.momentum, layer_params[i].bias.decay * layer_params[i].bias.learn_rate);
}
break;
case CCV_CONVNET_FULL_CONNECT:
Expand All @@ -1624,25 +1624,26 @@ static void _cwc_convnet_net_sgd(ccv_convnet_t* convnet, int momentum_read, int
<1>
<<<num_blocks_for_coeff, threads_per_block, 0, context->device.stream>>>
(layer->w, configuration->w, momentum->w, layer->wnum,
layer_params[i].w.learn_rate, layer_params[i].w.momentum, layer_params[i].w.decay);
layer_params[i].w.learn_rate / batch, layer_params[i].w.momentum, layer_params[i].w.decay * layer_params[i].w.learn_rate);
_cwc_kern_net_sgd
<1>
<<<num_blocks_for_bias, threads_per_block, 0, context->device.stream>>>
(layer->bias, configuration->bias, momentum->bias, layer->net.full_connect.count,
layer_params[i].bias.learn_rate, layer_params[i].bias.momentum, layer_params[i].bias.decay);
layer_params[i].bias.learn_rate / batch, layer_params[i].bias.momentum, layer_params[i].bias.decay * layer_params[i].bias.learn_rate);
} else {
_cwc_kern_net_sgd
<0>
<<<num_blocks_for_coeff, threads_per_block, 0, context->device.stream>>>
(layer->w, configuration->w, momentum->w, layer->wnum,
layer_params[i].w.learn_rate, layer_params[i].w.momentum, layer_params[i].w.decay);
layer_params[i].w.learn_rate / batch, layer_params[i].w.momentum, layer_params[i].w.decay * layer_params[i].w.learn_rate);
_cwc_kern_net_sgd
<0>
<<<num_blocks_for_bias, threads_per_block, 0, context->device.stream>>>
(layer->bias, configuration->bias, momentum->bias, layer->net.full_connect.count,
layer_params[i].bias.learn_rate, layer_params[i].bias.momentum, layer_params[i].bias.decay);
layer_params[i].bias.learn_rate / batch, layer_params[i].bias.momentum, layer_params[i].bias.decay * layer_params[i].bias.learn_rate);
}
break;
case CCV_CONVNET_LOCAL_RESPONSE_NORM:
case CCV_CONVNET_MAX_POOL:
case CCV_CONVNET_AVERAGE_POOL:
break;
Expand Down Expand Up @@ -1783,6 +1784,7 @@ static void _cwc_convnet_mean_formation(ccv_array_t* categorizeds, ccv_size_t di
for (i = 0; i < dim.height * dim.width * channels; i++)
db->data.f32[i] = p * c->data.f64[i];
}
ccv_matrix_free(c);
printf("\n");
}

Expand Down

0 comments on commit 2742097

Please sign in to comment.