added a new post and updated cifar-10 to be runnable

archsh · Mar 27, 2014 · 38db768 · 38db768
1 parent 3c247ce
commit 38db768
Show file tree

Hide file tree

Showing 8 changed files with 112 additions and 74 deletions.
diff --git a/bin/cifar-10.c b/bin/cifar-10.c
@@ -14,6 +14,7 @@ int main(int argc, char** argv)
 					.rows = 31,
 					.cols = 31,
 					.channels = 3,
+					.partition = 1,
 				},
 			},
 			.output = {
@@ -24,41 +25,44 @@ int main(int argc, char** argv)
 					.border = 2,
 					.strides = 1,
 					.count = 32,
+					.partition = 1,
 				},
 			},
 		},
 		{
-			.type = CCV_CONVNET_MAX_POOL,
+			.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
 			.input = {
 				.matrix = {
 					.rows = 31,
 					.cols = 31,
 					.channels = 32,
+					.partition = 1,
 				},
 			},
 			.output = {
-				.pool = {
+				.rnorm = {
 					.size = 3,
-					.strides = 2,
-					.border = 0,
+					.kappa = 1,
+					.alpha = 1e-4,
+					.beta = 0.75,
 				},
 			},
 		},
 		{
-			.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
+			.type = CCV_CONVNET_MAX_POOL,
 			.input = {
 				.matrix = {
-					.rows = 15,
-					.cols = 15,
+					.rows = 31,
+					.cols = 31,
 					.channels = 32,
+					.partition = 1,
 				},
 			},
 			.output = {
-				.rnorm = {
+				.pool = {
 					.size = 3,
-					.kappa = 1,
-					.alpha = 0.0001,
-					.beta = 0.75,
+					.strides = 2,
+					.border = 0,
 				},
 			},
 		},
@@ -71,6 +75,7 @@ int main(int argc, char** argv)
 					.rows = 15,
 					.cols = 15,
 					.channels = 32,
+					.partition = 1,
 				},
 			},
 			.output = {
@@ -81,41 +86,44 @@ int main(int argc, char** argv)
 					.border = 2,
 					.strides = 1,
 					.count = 32,
+					.partition = 1,
 				},
 			},
 		},
 		{
-			.type = CCV_CONVNET_AVERAGE_POOL,
+			.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
 			.input = {
 				.matrix = {
 					.rows = 15,
 					.cols = 15,
 					.channels = 32,
+					.partition = 1,
 				},
 			},
 			.output = {
-				.pool = {
+				.rnorm = {
 					.size = 3,
-					.strides = 2,
-					.border = 0,
+					.kappa = 1,
+					.alpha = 1e-4,
+					.beta = 0.75,
 				},
 			},
 		},
 		{
-			.type = CCV_CONVNET_LOCAL_RESPONSE_NORM,
+			.type = CCV_CONVNET_AVERAGE_POOL,
 			.input = {
 				.matrix = {
-					.rows = 7,
-					.cols = 7,
+					.rows = 15,
+					.cols = 15,
 					.channels = 32,
+					.partition = 1,
 				},
 			},
 			.output = {
-				.rnorm = {
+				.pool = {
 					.size = 3,
-					.kappa = 1,
-					.alpha = 0.0001,
-					.beta = 0.75,
+					.strides = 2,
+					.border = 0,
 				},
 			},
 		},
@@ -128,6 +136,7 @@ int main(int argc, char** argv)
 					.rows = 7,
 					.cols = 7,
 					.channels = 32,
+					.partition = 1,
 				},
 			},
 			.output = {
@@ -138,6 +147,7 @@ int main(int argc, char** argv)
 					.border = 2,
 					.strides = 1,
 					.count = 64,
+					.partition = 1,
 				},
 			},
 		},
@@ -148,6 +158,7 @@ int main(int argc, char** argv)
 					.rows = 7,
 					.cols = 7,
 					.channels = 64,
+					.partition = 1,
 				},
 			},
 			.output = {
@@ -167,19 +178,21 @@ int main(int argc, char** argv)
 					.rows = 3,
 					.cols = 3,
 					.channels = 64,
+					.partition = 1,
 				},
 				.node = {
 					.count = 3 * 3 * 64,
 				},
 			},
 			.output = {
 				.full_connect = {
+					.relu = 0,
 					.count = 10,
 				},
 			},
 		},
 	};
-	ccv_convnet_t* convnet = ccv_convnet_new(1, ccv_size(31, 31), params, sizeof(params) / sizeof(ccv_convnet_layer_param_t));
+	ccv_convnet_t* convnet = ccv_convnet_new(1, ccv_size(32, 32), params, sizeof(params) / sizeof(ccv_convnet_layer_param_t));
 	assert(ccv_convnet_verify(convnet, 10) == 0);
 	assert(argc == 5);
 	int num1 = atoi(argv[2]);
@@ -195,18 +208,18 @@ int main(int argc, char** argv)
 		{
 			fread(bytes, 32 * 32 + 1, 1, r1);
 			int c = bytes[0];
-			ccv_dense_matrix_t* a = ccv_dense_matrix_new(31, 31, CCV_32F | CCV_C3, 0, 0);
-			for (i = 0; i < 31; i++)
-				for (j = 0; j < 31; j++)
-					a->data.f32[(j + i * 31) * 3] = bytes[j + i * 32 + 1] / 255.0 * 2 - 1;
+			ccv_dense_matrix_t* a = ccv_dense_matrix_new(32, 32, CCV_32F | CCV_C3, 0, 0);
+			for (i = 0; i < 32; i++)
+				for (j = 0; j < 32; j++)
+					a->data.f32[(j + i * 32) * 3] = bytes[j + i * 32 + 1];
 			fread(bytes, 32 * 32, 1, r1);
-			for (i = 0; i < 31; i++)
-				for (j = 0; j < 31; j++)
-					a->data.f32[(j + i * 31) * 3 + 1] = bytes[j + i * 32] / 255.0 * 2 - 1;
+			for (i = 0; i < 32; i++)
+				for (j = 0; j < 32; j++)
+					a->data.f32[(j + i * 32) * 3 + 1] = bytes[j + i * 32];
 			fread(bytes, 32 * 32, 1, r1);
-			for (i = 0; i < 31; i++)
-				for (j = 0; j < 31; j++)
-					a->data.f32[(j + i * 31) * 3 + 2] = bytes[j + i * 32] / 255.0 * 2 - 1;
+			for (i = 0; i < 32; i++)
+				for (j = 0; j < 32; j++)
+					a->data.f32[(j + i * 32) * 3 + 2] = bytes[j + i * 32];
 			ccv_categorized_t categorized = ccv_categorized(c, a, 0);
 			ccv_array_push(categorizeds, &categorized);
 		}
@@ -215,47 +228,47 @@ int main(int argc, char** argv)
 		{
 			fread(bytes, 32 * 32 + 1, 1, r2);
 			int c = bytes[0];
-			ccv_dense_matrix_t* a = ccv_dense_matrix_new(31, 31, CCV_32F | CCV_C3, 0, 0);
-			for (i = 0; i < 31; i++)
-				for (j = 0; j < 31; j++)
-					a->data.f32[(j + i * 31) * 3] = bytes[j + i * 32 + 1] / 255.0 * 2 - 1;
+			ccv_dense_matrix_t* a = ccv_dense_matrix_new(32, 32, CCV_32F | CCV_C3, 0, 0);
+			for (i = 0; i < 32; i++)
+				for (j = 0; j < 32; j++)
+					a->data.f32[(j + i * 32) * 3] = bytes[j + i * 32 + 1];
 			fread(bytes, 32 * 32, 1, r2);
-			for (i = 0; i < 31; i++)
-				for (j = 0; j < 31; j++)
-					a->data.f32[(j + i * 31) * 3 + 1] = bytes[j + i * 32] / 255.0 * 2 - 1;
+			for (i = 0; i < 32; i++)
+				for (j = 0; j < 32; j++)
+					a->data.f32[(j + i * 32) * 3 + 1] = bytes[j + i * 32];
 			fread(bytes, 32 * 32, 1, r2);
-			for (i = 0; i < 31; i++)
-				for (j = 0; j < 31; j++)
-					a->data.f32[(j + i * 31) * 3 + 2] = bytes[j + i * 32] / 255.0 * 2 - 1;
+			for (i = 0; i < 32; i++)
+				for (j = 0; j < 32; j++)
+					a->data.f32[(j + i * 32) * 3 + 2] = bytes[j + i * 32];
 			ccv_categorized_t categorized = ccv_categorized(c, a, 0);
 			ccv_array_push(tests, &categorized);
 		}
 		ccv_convnet_layer_train_param_t layer_params[9];
 		memset(layer_params, 0, sizeof(layer_params));
 
 		layer_params[0].w.decay = 0.005;
-		layer_params[0].w.learn_rate = 0.0005;
+		layer_params[0].w.learn_rate = 0.001;
 		layer_params[0].w.momentum = 0.9;
 		layer_params[0].bias.decay = 0;
 		layer_params[0].bias.learn_rate = 0.001;
 		layer_params[0].bias.momentum = 0.9;
 
 		layer_params[3].w.decay = 0.005;
-		layer_params[3].w.learn_rate = 0.0005;
+		layer_params[3].w.learn_rate = 0.001;
 		layer_params[3].w.momentum = 0.9;
 		layer_params[3].bias.decay = 0;
 		layer_params[3].bias.learn_rate = 0.001;
 		layer_params[3].bias.momentum = 0.9;
 
 		layer_params[6].w.decay = 0.005;
-		layer_params[6].w.learn_rate = 0.0005;
+		layer_params[6].w.learn_rate = 0.001;
 		layer_params[6].w.momentum = 0.9;
 		layer_params[6].bias.decay = 0;
 		layer_params[6].bias.learn_rate = 0.001;
 		layer_params[6].bias.momentum = 0.9;
 
 		layer_params[8].w.decay = 0.01;
-		layer_params[8].w.learn_rate = 0.0005;
+		layer_params[8].w.learn_rate = 0.001;
 		layer_params[8].w.momentum = 0.9;
 		layer_params[8].bias.decay = 0;
 		layer_params[8].bias.learn_rate = 0.001;
@@ -265,6 +278,8 @@ int main(int argc, char** argv)
 			.max_epoch = 999,
 			.mini_batch = 128,
 			.iterations = 500,
+			.symmetric = 1,
+			.color_gain = 0,
 			.layer_params = layer_params,
 		};
 		ccv_convnet_supervised_train(convnet, categorizeds, tests, "cifar-10.sqlite3", params);

diff --git a/doc/convnet.md b/doc/convnet.md
@@ -67,8 +67,8 @@ than Alex's result, the top-5 missing rate is 16.26%, 0.74% better than Alex's.
 image-net.sqlite3 (the one included in ./samples/), the top-1 missing rate is 36.83% and the top-5
 missing rate is 16.25%.
 
-For 32-bit float point image-net.sqlite3 on CPU, the top-1 missing rate is 37.34%, and the top-5
-missing rate is 16.62%.
+For 32-bit float point image-net.sqlite3 on CPU, the top-1 missing rate is 37.31%, and the top-5
+missing rate is 16.55%.
 
 You can download the 32-bit float point one with ./samples/download-image-net.sh
 

diff --git a/serve/convnet.c b/serve/convnet.c
@@ -184,26 +184,10 @@ int uri_convnet_classify(const void* context, const void* parsed, ebb_buf* buf)
 		free(parser);
 		return -1;
 	}
-	ccv_dense_matrix_t* norm = 0;
 	ccv_convnet_t* convnet = parser->convnet_and_words->convnet;
-	if (image->rows > convnet->input.height && image->cols > parser->convnet_and_words->convnet->input.width)
-		ccv_resample(image, &norm, 0, ccv_max(convnet->input.height, (int)(image->rows * (float)convnet->input.height / image->cols + 0.5)), ccv_max(convnet->input.width, (int)(image->cols * (float)convnet->input.width / image->rows + 0.5)), CCV_INTER_AREA);
-	else if (image->rows < convnet->input.height || image->cols < convnet->input.width)
-		ccv_resample(image, &norm, 0, ccv_max(convnet->input.height, (int)(image->rows * (float)convnet->input.height / image->cols + 0.5)), ccv_max(convnet->input.width, (int)(image->cols * (float)convnet->input.width / image->rows + 0.5)), CCV_INTER_CUBIC);
-	else
-		norm = image;
-	if (norm != image)
-		ccv_matrix_free(image);
 	ccv_dense_matrix_t* input = 0;
-	if (norm->cols != convnet->input.width || norm->rows != convnet->input.height)
-	{
-		int x = (norm->cols - convnet->input.width + 1) / 2;
-		int y =  (norm->rows - convnet->input.height + 1) / 2;
-		assert(x == 0 || y == 0);
-		ccv_slice(norm, (ccv_matrix_t**)&input, CCV_32F, y, x, convnet->input.height, convnet->input.width);
-	} else
-		ccv_shift(norm, (ccv_matrix_t**)&input, CCV_32F, 0, 0); // converting to 32f
-	ccv_matrix_free(norm);
+	ccv_convnet_input_formation(convnet, image, &input);
+	ccv_matrix_free(image);
 	ccv_array_t* rank = 0;
 	ccv_convnet_classify(convnet, &input, 1, &rank, parser->top, 1);
 	// print out

diff --git a/site/_posts/0000-01-01-doc-convnet.markdown b/site/_posts/0000-01-01-doc-convnet.markdown
@@ -76,8 +76,8 @@ than Alex's result, the top-5 missing rate is 16.26%, 0.74% better than Alex's.
 image-net.sqlite3 (the one included in ./samples/), the top-1 missing rate is 36.83% and the top-5
 missing rate is 16.25%.
 
-For 32-bit float point image-net.sqlite3 on CPU, the top-1 missing rate is 37.34%, and the top-5
-missing rate is 16.62%.
+For 32-bit float point image-net.sqlite3 on CPU, the top-1 missing rate is 37.31%, and the top-5
+missing rate is 16.55%.
 
 You can download the 32-bit float point one with ./samples/download-image-net.sh
 
@@ -105,7 +105,7 @@ within 6 days on two GeForce 580, which suggests my time is within line of these
 As a preliminary implementation, I didn't spend enough time to optimize these operations in ccv if
 any at all. For example, [cuda-convnet](http://code.google.com/p/cuda-convnet/) implements its
 functionalities in about 10,000 lines of code, Caffe implements with 14,000 lines of code, as of
-this release, ccv implements with about 3,700 lines of code. For the future, the low-hanging
+this release, ccv implements with about 4,300 lines of code. For the future, the low-hanging
 optimization opportunities include using SIMD instruction, doing FFT in densely convolved layers
 etc.
 

diff --git a/...-sources-near-state-of-the-art-image-classifier-under-creative-commons.markdown b/...-sources-near-state-of-the-art-image-classifier-under-creative-commons.markdown
@@ -0,0 +1,37 @@
+---
+date: '2014-03-27 07:35:00'
+layout: post
+slug: ccv-0.6-open-sources-near-state-of-the-art-image-classifier-under-creative-commons
+status: publish
+title: ccv 0.6 open sources near state-of-the-art image classifier under Creative Commons
+categories:
+- post
+---
+
+In previous posts, I mentioned that the even numbered release will be bugfixes. However, 0.6 is a bit different.
+
+![now-go-back-and-play-forza](/photo/2014-03-27-dex.png)
+
+For the past one and half year, deep learning, particularly deep convolutional neural network based image classification made waves in the vision community. For a library aiming at providing state-of-the-art implementations, it would be frustrating to not having a competent image classifier implemented after over a year the ground-breaking work published. In the meantime, there are a few open source libraries provided complete ([Caffe](http://caffe.berkeleyvision.org/)) / incomplete ([OverFeat](http://cilvr.nyu.edu/doku.php?id=software:overfeat:start), [cuda-convnet](http://code.google.com/p/cuda-convnet/)) implementations of the said image classifier. However, all of them are focusing on research related activities (see their licenses: [1](http://caffe.berkeleyvision.org/getting_pretrained_models.html), [2](https://github.com/sermanet/OverFeat/blob/master/LICENSE)). Thus, for the past 5 months, I've been working on an image classifier in ccv with deep convolutional neural network.
+
+This version's ccv distributed a image classifier that is trained with ILSVRC 2010 data set of 1000 classes, with top-1 missing rate at 36.83% and top-5 missing rate at 16.25%, thus, close to the state-of-the-art image classifier (Clarifai in ILSVRC 2013 gets top-5 missing rate at 11.19%: <http://www.image-net.org/challenges/LSVRC/2013/results.php>, the 16.25% top-5 missing rate is reproduced with ILSVRC 2010 test data set, which is known to be less challenging). [See more about this image classifier](/doc/doc-convnet).
+
+The license for these data models (the said image classifier, and pedestrian detectors, car detector, face detector) provided in ccv ./samples is changed from [BSD 3-clause license](https://raw.github.com/liuliu/ccv/unstable/COPYING) to [Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by/4.0/) in hope that this more clarified license will help the adoption of these trained data models.
+
+As always, the new image classifier is available through ccv's RESTful interface at http://localhost:3350/convnet/classify. You can also play with ccv at <http://docomputersdream.org/>
+
+Since this version is an anomaly in terms of release cycle, next two versions of ccv will be devoted to bugfixes and performance improvement. There is also a plan to enter ILSVRC 2014 and publish results on FDDB and LFW for the sake of keeping ccv's implementation fresh and competitive.
+
+Other changes / bugfixes in ccv 0.6:
+
+1). Moved from hand-written configure script to autoconf (which still provides link / flag information);
+
+2). <http://ci.libccv.org/> is online to monitor builds for unstable branch and free of static analyzer reports: <http://ci.libccv.org/analyze/>;
+
+3). Fixed a bug in ./serve/ccv that returned HTTP header claims to be 1.1 but never keeps the connection open;
+
+4). RESTful interface for SWT added Tesseract (OCR) support;
+
+5). Fixed ICF implementation problem with non-standard float point representation;
+
+6). Fixed multi-thread bug with fftw3 usage;
diff --git a/site/index.html b/site/index.html
@@ -28,8 +28,9 @@ <h2>Modern Computer Vision Algorithms</h2>
 <a href="/doc/doc-bbf">a very fast detection algorithm for rigid object</a>
 (face etc.), <a href="/doc/doc-icf">a strong rigid object detetion algorithm</a>
 (pedestrian etc.), <a href="/doc/doc-dpm">an accurate object detection algorithm
-for somewhat difficult object</a> (car, cat etc.), <a href="/doc/doc-swt">a
-state-of-art text detection algorithm</a>, <a href="/doc/doc-tld">a long term
+for somewhat difficult object</a> (car, cat etc.), <a href="/doc/doc-convnet">a
+deep-learning based near state-of-the-art image classifier</a>, <a href="/doc/doc-swt">a
+state-of-the-art text detection algorithm</a>, <a href="/doc/doc-tld">a long term
 object tracking algorithm</a>, and <a href="/doc/doc-sift">the long-standing
 feature point detection algorithm</a>.</p>
 <p>For computer vision community, there is no shortage of good algorithms, good
@@ -38,7 +39,8 @@ <h2>Modern Computer Vision Algorithms</h2>
 shining but Matlab algorithms. ccv is my take on this problem, hope you enjoy
 it.</p>
 <h2>License</h2>
-<p>ccv is distributed under <a href="https://raw.github.com/liuliu/ccv/unstable/COPYING">BSD 3-clause license</a>.<p>
+<p>ccv source code is distributed under <a href="https://raw.github.com/liuliu/ccv/unstable/COPYING">BSD 3-clause License</a>.<p>
+<p>ccv's data models and documentations are distributed under <a href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</a>.<p>
 <h2><a href="/galaxy-guide">Getting Started</a></h2>
 <h2><a href="/doc">Documentation</a></h2>
 <h2><a href="/lib">Library Reference</a></h2>

diff --git a/site/photo/2014-03-27-dex.png b/site/photo/2014-03-27-dex.png