Skip to content

Commit

Permalink
added a new post and updated cifar-10 to be runnable
Browse files Browse the repository at this point in the history
  • Loading branch information
liuliu committed Mar 27, 2014
1 parent 3c247ce commit 38db768
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 74 deletions.
109 changes: 62 additions & 47 deletions bin/cifar-10.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ int main(int argc, char** argv)
.rows = 31,
.cols = 31,
.channels = 3,
.partition = 1,
},
},
.output = {
Expand All @@ -24,41 +25,44 @@ int main(int argc, char** argv)
.border = 2,
.strides = 1,
.count = 32,
.partition = 1,
},
},
},
{
.type = CCV_CONVNET_MAX_POOL,
.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
.input = {
.matrix = {
.rows = 31,
.cols = 31,
.channels = 32,
.partition = 1,
},
},
.output = {
.pool = {
.rnorm = {
.size = 3,
.strides = 2,
.border = 0,
.kappa = 1,
.alpha = 1e-4,
.beta = 0.75,
},
},
},
{
.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
.type = CCV_CONVNET_MAX_POOL,
.input = {
.matrix = {
.rows = 15,
.cols = 15,
.rows = 31,
.cols = 31,
.channels = 32,
.partition = 1,
},
},
.output = {
.rnorm = {
.pool = {
.size = 3,
.kappa = 1,
.alpha = 0.0001,
.beta = 0.75,
.strides = 2,
.border = 0,
},
},
},
Expand All @@ -71,6 +75,7 @@ int main(int argc, char** argv)
.rows = 15,
.cols = 15,
.channels = 32,
.partition = 1,
},
},
.output = {
Expand All @@ -81,41 +86,44 @@ int main(int argc, char** argv)
.border = 2,
.strides = 1,
.count = 32,
.partition = 1,
},
},
},
{
.type = CCV_CONVNET_AVERAGE_POOL,
.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
.input = {
.matrix = {
.rows = 15,
.cols = 15,
.channels = 32,
.partition = 1,
},
},
.output = {
.pool = {
.rnorm = {
.size = 3,
.strides = 2,
.border = 0,
.kappa = 1,
.alpha = 1e-4,
.beta = 0.75,
},
},
},
{
.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
.type = CCV_CONVNET_AVERAGE_POOL,
.input = {
.matrix = {
.rows = 7,
.cols = 7,
.rows = 15,
.cols = 15,
.channels = 32,
.partition = 1,
},
},
.output = {
.rnorm = {
.pool = {
.size = 3,
.kappa = 1,
.alpha = 0.0001,
.beta = 0.75,
.strides = 2,
.border = 0,
},
},
},
Expand All @@ -128,6 +136,7 @@ int main(int argc, char** argv)
.rows = 7,
.cols = 7,
.channels = 32,
.partition = 1,
},
},
.output = {
Expand All @@ -138,6 +147,7 @@ int main(int argc, char** argv)
.border = 2,
.strides = 1,
.count = 64,
.partition = 1,
},
},
},
Expand All @@ -148,6 +158,7 @@ int main(int argc, char** argv)
.rows = 7,
.cols = 7,
.channels = 64,
.partition = 1,
},
},
.output = {
Expand All @@ -167,19 +178,21 @@ int main(int argc, char** argv)
.rows = 3,
.cols = 3,
.channels = 64,
.partition = 1,
},
.node = {
.count = 3 * 3 * 64,
},
},
.output = {
.full_connect = {
.relu = 0,
.count = 10,
},
},
},
};
ccv_convnet_t* convnet = ccv_convnet_new(1, ccv_size(31, 31), params, sizeof(params) / sizeof(ccv_convnet_layer_param_t));
ccv_convnet_t* convnet = ccv_convnet_new(1, ccv_size(32, 32), params, sizeof(params) / sizeof(ccv_convnet_layer_param_t));
assert(ccv_convnet_verify(convnet, 10) == 0);
assert(argc == 5);
int num1 = atoi(argv[2]);
Expand All @@ -195,18 +208,18 @@ int main(int argc, char** argv)
{
fread(bytes, 32 * 32 + 1, 1, r1);
int c = bytes[0];
ccv_dense_matrix_t* a = ccv_dense_matrix_new(31, 31, CCV_32F | CCV_C3, 0, 0);
for (i = 0; i < 31; i++)
for (j = 0; j < 31; j++)
a->data.f32[(j + i * 31) * 3] = bytes[j + i * 32 + 1] / 255.0 * 2 - 1;
ccv_dense_matrix_t* a = ccv_dense_matrix_new(32, 32, CCV_32F | CCV_C3, 0, 0);
for (i = 0; i < 32; i++)
for (j = 0; j < 32; j++)
a->data.f32[(j + i * 32) * 3] = bytes[j + i * 32 + 1];
fread(bytes, 32 * 32, 1, r1);
for (i = 0; i < 31; i++)
for (j = 0; j < 31; j++)
a->data.f32[(j + i * 31) * 3 + 1] = bytes[j + i * 32] / 255.0 * 2 - 1;
for (i = 0; i < 32; i++)
for (j = 0; j < 32; j++)
a->data.f32[(j + i * 32) * 3 + 1] = bytes[j + i * 32];
fread(bytes, 32 * 32, 1, r1);
for (i = 0; i < 31; i++)
for (j = 0; j < 31; j++)
a->data.f32[(j + i * 31) * 3 + 2] = bytes[j + i * 32] / 255.0 * 2 - 1;
for (i = 0; i < 32; i++)
for (j = 0; j < 32; j++)
a->data.f32[(j + i * 32) * 3 + 2] = bytes[j + i * 32];
ccv_categorized_t categorized = ccv_categorized(c, a, 0);
ccv_array_push(categorizeds, &categorized);
}
Expand All @@ -215,47 +228,47 @@ int main(int argc, char** argv)
{
fread(bytes, 32 * 32 + 1, 1, r2);
int c = bytes[0];
ccv_dense_matrix_t* a = ccv_dense_matrix_new(31, 31, CCV_32F | CCV_C3, 0, 0);
for (i = 0; i < 31; i++)
for (j = 0; j < 31; j++)
a->data.f32[(j + i * 31) * 3] = bytes[j + i * 32 + 1] / 255.0 * 2 - 1;
ccv_dense_matrix_t* a = ccv_dense_matrix_new(32, 32, CCV_32F | CCV_C3, 0, 0);
for (i = 0; i < 32; i++)
for (j = 0; j < 32; j++)
a->data.f32[(j + i * 32) * 3] = bytes[j + i * 32 + 1];
fread(bytes, 32 * 32, 1, r2);
for (i = 0; i < 31; i++)
for (j = 0; j < 31; j++)
a->data.f32[(j + i * 31) * 3 + 1] = bytes[j + i * 32] / 255.0 * 2 - 1;
for (i = 0; i < 32; i++)
for (j = 0; j < 32; j++)
a->data.f32[(j + i * 32) * 3 + 1] = bytes[j + i * 32];
fread(bytes, 32 * 32, 1, r2);
for (i = 0; i < 31; i++)
for (j = 0; j < 31; j++)
a->data.f32[(j + i * 31) * 3 + 2] = bytes[j + i * 32] / 255.0 * 2 - 1;
for (i = 0; i < 32; i++)
for (j = 0; j < 32; j++)
a->data.f32[(j + i * 32) * 3 + 2] = bytes[j + i * 32];
ccv_categorized_t categorized = ccv_categorized(c, a, 0);
ccv_array_push(tests, &categorized);
}
ccv_convnet_layer_train_param_t layer_params[9];
memset(layer_params, 0, sizeof(layer_params));

layer_params[0].w.decay = 0.005;
layer_params[0].w.learn_rate = 0.0005;
layer_params[0].w.learn_rate = 0.001;
layer_params[0].w.momentum = 0.9;
layer_params[0].bias.decay = 0;
layer_params[0].bias.learn_rate = 0.001;
layer_params[0].bias.momentum = 0.9;

layer_params[3].w.decay = 0.005;
layer_params[3].w.learn_rate = 0.0005;
layer_params[3].w.learn_rate = 0.001;
layer_params[3].w.momentum = 0.9;
layer_params[3].bias.decay = 0;
layer_params[3].bias.learn_rate = 0.001;
layer_params[3].bias.momentum = 0.9;

layer_params[6].w.decay = 0.005;
layer_params[6].w.learn_rate = 0.0005;
layer_params[6].w.learn_rate = 0.001;
layer_params[6].w.momentum = 0.9;
layer_params[6].bias.decay = 0;
layer_params[6].bias.learn_rate = 0.001;
layer_params[6].bias.momentum = 0.9;

layer_params[8].w.decay = 0.01;
layer_params[8].w.learn_rate = 0.0005;
layer_params[8].w.learn_rate = 0.001;
layer_params[8].w.momentum = 0.9;
layer_params[8].bias.decay = 0;
layer_params[8].bias.learn_rate = 0.001;
Expand All @@ -265,6 +278,8 @@ int main(int argc, char** argv)
.max_epoch = 999,
.mini_batch = 128,
.iterations = 500,
.symmetric = 1,
.color_gain = 0,
.layer_params = layer_params,
};
ccv_convnet_supervised_train(convnet, categorizeds, tests, "cifar-10.sqlite3", params);
Expand Down
4 changes: 2 additions & 2 deletions doc/convnet.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ than Alex's result, the top-5 missing rate is 16.26%, 0.74% better than Alex's.
image-net.sqlite3 (the one included in ./samples/), the top-1 missing rate is 36.83% and the top-5
missing rate is 16.25%.

For 32-bit float point image-net.sqlite3 on CPU, the top-1 missing rate is 37.34%, and the top-5
missing rate is 16.62%.
For 32-bit float point image-net.sqlite3 on CPU, the top-1 missing rate is 37.31%, and the top-5
missing rate is 16.55%.

You can download the 32-bit float point one with ./samples/download-image-net.sh

Expand Down
20 changes: 2 additions & 18 deletions serve/convnet.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,26 +184,10 @@ int uri_convnet_classify(const void* context, const void* parsed, ebb_buf* buf)
free(parser);
return -1;
}
ccv_dense_matrix_t* norm = 0;
ccv_convnet_t* convnet = parser->convnet_and_words->convnet;
if (image->rows > convnet->input.height && image->cols > parser->convnet_and_words->convnet->input.width)
ccv_resample(image, &norm, 0, ccv_max(convnet->input.height, (int)(image->rows * (float)convnet->input.height / image->cols + 0.5)), ccv_max(convnet->input.width, (int)(image->cols * (float)convnet->input.width / image->rows + 0.5)), CCV_INTER_AREA);
else if (image->rows < convnet->input.height || image->cols < convnet->input.width)
ccv_resample(image, &norm, 0, ccv_max(convnet->input.height, (int)(image->rows * (float)convnet->input.height / image->cols + 0.5)), ccv_max(convnet->input.width, (int)(image->cols * (float)convnet->input.width / image->rows + 0.5)), CCV_INTER_CUBIC);
else
norm = image;
if (norm != image)
ccv_matrix_free(image);
ccv_dense_matrix_t* input = 0;
if (norm->cols != convnet->input.width || norm->rows != convnet->input.height)
{
int x = (norm->cols - convnet->input.width + 1) / 2;
int y = (norm->rows - convnet->input.height + 1) / 2;
assert(x == 0 || y == 0);
ccv_slice(norm, (ccv_matrix_t**)&input, CCV_32F, y, x, convnet->input.height, convnet->input.width);
} else
ccv_shift(norm, (ccv_matrix_t**)&input, CCV_32F, 0, 0); // converting to 32f
ccv_matrix_free(norm);
ccv_convnet_input_formation(convnet, image, &input);
ccv_matrix_free(image);
ccv_array_t* rank = 0;
ccv_convnet_classify(convnet, &input, 1, &rank, parser->top, 1);
// print out
Expand Down
6 changes: 3 additions & 3 deletions site/_posts/0000-01-01-doc-convnet.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ than Alex's result, the top-5 missing rate is 16.26%, 0.74% better than Alex's.
image-net.sqlite3 (the one included in ./samples/), the top-1 missing rate is 36.83% and the top-5
missing rate is 16.25%.

For 32-bit float point image-net.sqlite3 on CPU, the top-1 missing rate is 37.34%, and the top-5
missing rate is 16.62%.
For 32-bit float point image-net.sqlite3 on CPU, the top-1 missing rate is 37.31%, and the top-5
missing rate is 16.55%.

You can download the 32-bit float point one with ./samples/download-image-net.sh

Expand Down Expand Up @@ -105,7 +105,7 @@ within 6 days on two GeForce 580, which suggests my time is within line of these
As a preliminary implementation, I didn't spend enough time to optimize these operations in ccv if
any at all. For example, [cuda-convnet](http://code.google.com/p/cuda-convnet/) implements its
functionalities in about 10,000 lines of code, Caffe implements with 14,000 lines of code, as of
this release, ccv implements with about 3,700 lines of code. For the future, the low-hanging
this release, ccv implements with about 4,300 lines of code. For the future, the low-hanging
optimization opportunities include using SIMD instruction, doing FFT in densely convolved layers
etc.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
---
date: '2014-03-27 07:35:00'
layout: post
slug: ccv-0.6-open-sources-near-state-of-the-art-image-classifier-under-creative-commons
status: publish
title: ccv 0.6 open sources near state-of-the-art image classifier under Creative Commons
categories:
- post
---

In previous posts, I mentioned that the even numbered release will be bugfixes. However, 0.6 is a bit different.

![now-go-back-and-play-forza](/photo/2014-03-27-dex.png)

For the past one and half year, deep learning, particularly deep convolutional neural network based image classification made waves in the vision community. For a library aiming at providing state-of-the-art implementations, it would be frustrating to not having a competent image classifier implemented after over a year the ground-breaking work published. In the meantime, there are a few open source libraries provided complete ([Caffe](http://caffe.berkeleyvision.org/)) / incomplete ([OverFeat](http://cilvr.nyu.edu/doku.php?id=software:overfeat:start), [cuda-convnet](http://code.google.com/p/cuda-convnet/)) implementations of the said image classifier. However, all of them are focusing on research related activities (see their licenses: [1](http://caffe.berkeleyvision.org/getting_pretrained_models.html), [2](https://github.com/sermanet/OverFeat/blob/master/LICENSE)). Thus, for the past 5 months, I've been working on an image classifier in ccv with deep convolutional neural network.

This version's ccv distributed a image classifier that is trained with ILSVRC 2010 data set of 1000 classes, with top-1 missing rate at 36.83% and top-5 missing rate at 16.25%, thus, close to the state-of-the-art image classifier (Clarifai in ILSVRC 2013 gets top-5 missing rate at 11.19%: <http://www.image-net.org/challenges/LSVRC/2013/results.php>, the 16.25% top-5 missing rate is reproduced with ILSVRC 2010 test data set, which is known to be less challenging). [See more about this image classifier](/doc/doc-convnet).

The license for these data models (the said image classifier, and pedestrian detectors, car detector, face detector) provided in ccv ./samples is changed from [BSD 3-clause license](https://raw.github.com/liuliu/ccv/unstable/COPYING) to [Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by/4.0/) in hope that this more clarified license will help the adoption of these trained data models.

As always, the new image classifier is available through ccv's RESTful interface at http://localhost:3350/convnet/classify. You can also play with ccv at <http://docomputersdream.org/>

Since this version is an anomaly in terms of release cycle, next two versions of ccv will be devoted to bugfixes and performance improvement. There is also a plan to enter ILSVRC 2014 and publish results on FDDB and LFW for the sake of keeping ccv's implementation fresh and competitive.

Other changes / bugfixes in ccv 0.6:

1). Moved from hand-written configure script to autoconf (which still provides link / flag information);

2). <http://ci.libccv.org/> is online to monitor builds for unstable branch and free of static analyzer reports: <http://ci.libccv.org/analyze/>;

3). Fixed a bug in ./serve/ccv that returned HTTP header claims to be 1.1 but never keeps the connection open;

4). RESTful interface for SWT added Tesseract (OCR) support;

5). Fixed ICF implementation problem with non-standard float point representation;

6). Fixed multi-thread bug with fftw3 usage;
8 changes: 5 additions & 3 deletions site/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ <h2>Modern Computer Vision Algorithms</h2>
<a href="/doc/doc-bbf">a very fast detection algorithm for rigid object</a>
(face etc.), <a href="/doc/doc-icf">a strong rigid object detetion algorithm</a>
(pedestrian etc.), <a href="/doc/doc-dpm">an accurate object detection algorithm
for somewhat difficult object</a> (car, cat etc.), <a href="/doc/doc-swt">a
state-of-art text detection algorithm</a>, <a href="/doc/doc-tld">a long term
for somewhat difficult object</a> (car, cat etc.), <a href="/doc/doc-convnet">a
deep-learning based near state-of-the-art image classifier</a>, <a href="/doc/doc-swt">a
state-of-the-art text detection algorithm</a>, <a href="/doc/doc-tld">a long term
object tracking algorithm</a>, and <a href="/doc/doc-sift">the long-standing
feature point detection algorithm</a>.</p>
<p>For computer vision community, there is no shortage of good algorithms, good
Expand All @@ -38,7 +39,8 @@ <h2>Modern Computer Vision Algorithms</h2>
shining but Matlab algorithms. ccv is my take on this problem, hope you enjoy
it.</p>
<h2>License</h2>
<p>ccv is distributed under <a href="https://raw.github.com/liuliu/ccv/unstable/COPYING">BSD 3-clause license</a>.<p>
<p>ccv source code is distributed under <a href="https://raw.github.com/liuliu/ccv/unstable/COPYING">BSD 3-clause License</a>.<p>
<p>ccv's data models and documentations are distributed under <a href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</a>.<p>
<h2><a href="/galaxy-guide">Getting Started</a></h2>
<h2><a href="/doc">Documentation</a></h2>
<h2><a href="/lib">Library Reference</a></h2>
Expand Down
Binary file added site/photo/2014-03-27-dex.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 38db768

Please sign in to comment.