comment out image generation part of tld

added tld.md doc.
goje87 · Nov 9, 2012 · 023e644 · 023e644
1 parent 5410015
commit 023e644
Show file tree

Hide file tree

Showing 3 changed files with 114 additions and 5 deletions.
diff --git a/bin/tld.c b/bin/tld.c
@@ -16,9 +16,10 @@ int main(int argc, char** argv)
 #ifdef HAVE_SWSCALE
 	assert(argc == 6);
 	ccv_rect_t box = ccv_rect(atoi(argv[2]), atoi(argv[3]), atoi(argv[4]), atoi(argv[5]));
-	box.width = box.width - box.x + 1;
-	box.height = box.height - box.y + 1;
-	printf("%d,%d,%d,%d,%f\n", box.x, box.y, box.width + box.x - 1, box.height + box.y - 1, 1.0f);
+	// box.width = box.width - box.x + 1;
+	// box.height = box.height - box.y + 1;
+	// printf("%d,%d,%d,%d,%f\n", box.x, box.y, box.width + box.x - 1, box.height + box.y - 1, 1.0f);
+	printf("%05d: %d %d %d %d %f\n", 0, box.x, box.y, box.width, box.height, 1.0f);
 	// init av-related structs
 	AVFormatContext* ic = 0;
 	int video_stream = -1;
@@ -80,7 +81,8 @@ int main(int argc, char** argv)
 		ccv_read(rgb_picture.data[0], &y, CCV_IO_RGB_RAW | CCV_IO_GRAY, video_st->codec->height, video_st->codec->width, rgb_picture.linesize[0]);
 		ccv_tld_info_t info;
 		ccv_comp_t newbox = ccv_tld_track_object(tld, x, y, &info);
-		printf("%04d: performed learn: %d, performed track: %d, successfully track: %d; %d passed fern detector, %d passed nnc detector, %d merged, %d confident matches, %d close matches\n", tld->count, info.perform_learn, info.perform_track, info.track_success, info.ferns_detects, info.nnc_detects, info.clustered_detects, info.confident_matches, info.close_matches);
+		/*
+		// printf("%04d: performed learn: %d, performed track: %d, successfully track: %d; %d passed fern detector, %d passed nnc detector, %d merged, %d confident matches, %d close matches\n", tld->count, info.perform_learn, info.perform_track, info.track_success, info.ferns_detects, info.nnc_detects, info.clustered_detects, info.confident_matches, info.close_matches);
 		ccv_dense_matrix_t* image = 0;
 		ccv_read(rgb_picture.data[0], &image, CCV_IO_RGB_RAW | CCV_IO_RGB_COLOR, video_st->codec->height, video_st->codec->width, rgb_picture.linesize[0]);
 		// draw out
@@ -120,6 +122,12 @@ int main(int argc, char** argv)
 			printf("%d,%d,%d,%d,%f\n", newbox.rect.x, newbox.rect.y, newbox.rect.width + newbox.rect.x - 1, newbox.rect.height + newbox.rect.y - 1, newbox.confidence);
 		else
 			printf("NaN,NaN,NaN,NaN,NaN\n");
+		*/
+		if (tld->found)
+			printf("%05d: %d %d %d %d %f\n", tld->count, newbox.rect.x, newbox.rect.y, newbox.rect.width, newbox.rect.height, newbox.confidence);
+		else
+			printf("--------------\n");
+
 		x = y;
 		y = 0;
 	}

diff --git a/doc/tld.md b/doc/tld.md
@@ -0,0 +1,101 @@
+TLD: Track Learn Detect
+=======================
+
+What's TLD?
+-----------
+
+This algorithm, also known as "Predator" algorithm, developed by Zdenek Kalal. For
+more information, please visit his homepage: http://info.ee.surrey.ac.uk/Personal/Z.Kalal/tld.html
+
+How it works?
+-------------
+
+This is a long story, please read Zdenek's paper. Here is how it works in command-line
+if you compiled ccv with FFMPEG support:
+
+	./tld <Your Video> x y width height
+
+It will output each tracking coordinates for each frame.
+
+What about performance?
+-----------------------
+
+TLD is implemented closely after Zdenek's paper, but still, varies in quite a few
+aspects significantly. I've done excessive tests to make sure performance, in
+terms of accuracy and speed matches the original implementation.
+
+Accuracy-wise:
+
+TLD uses randomization algorithm, thus, the result can vary from time to time,
+I managed to run ccv's TLD implementation on test videos with "rotation == 0" and
+default parameters. With 3 runs and then pick the median, I've able to generate
+some meaningful data to analyze on.
+
+On motocross.mpg:
+
+	detections : 774
+	true detections : 1412
+	correct detections : 710
+	precision : 0.917313
+	recall : 0.502833
+	f-measure : 0.649588
+
+The result on the same video reported in: Zdenek Kalal, Jiri Matas and Krystian Mikolajczyk, Online Learning of Robust Object Detectors during Unstable Tracking:
+
+	precision : 0.96
+	recall : 0.54
+
+On pedestrian3.mpg:
+
+	After 69th frame failed to recover (out of 140 frames)
+
+The result on the same video reported in: Zdenek Kalal, Jiri Matas and Krystian Mikolajczyk, P-N Learning: Bootstrapping Binary Classifiers by Structural Constraints:
+
+	After 27th frame failed to recover (out of 140 frames)
+
+Note that a few runs I can get outperformed results than Zdenek's implementation
+sometimes, but choose to ignore these instead.
+
+All these results are obtained with alantrrs' evaluate_vis.py script in https://github.com/alantrrs/OpenTLD/blob/master/datasets/evaluate_vis.py and the dataset in
+that repository. Thanks alantrrs!
+
+Speed-wise:
+
+By enable "rotation" technique, you can achieve near real-time performance on QVGA
+video, with minor accuracy loss. With "rotation == 1" (default parameter), TLD
+spends around 15ms on tracking, 50ms on detecting, 50ms on learning for 320x240
+video on single thread of i7-2620M 2.7GHz.
+
+Under the hood?
+---------------
+
+ccv's TLD implementation varies from Zdenek's original Matlab implementation in
+several significant ways:
+
+1). Tracking:
+
+Zdenek's implementation uses a smaller LK window for computation (5x5), whereas
+ccv's implementation uses a 15x15 window for such.
+
+2). Ferns Detection (Random Forest):
+
+Zdenek's implementation uses random forest for object detection (in short, the
+probability for each feature add up), whereas ccv's implementation uses ferns
+for object detection (using multiplication of probabilities, A.K.A. semi-naive
+Bayes classifier). To compensate such choice, ccv's implementation uses 40 ferns,
+and for each fern, uses 18 features (the default parameter), and the default
+ferns threshold for ccv's implementation is 0.
+
+3). Nearest-neighbor Classifier:
+
+Zdenek's implementation uses aspect-ratio normalized examples (15x15); these
+examples are normalized so that a simple multiply can yield correlation confidence.
+ccv's implementation uses aspect-aware examples (constraint to area size of 400);
+examples are left as it is and using normalized coefficient computation to get
+confidence score.
+
+4). Pseudo-random Number Generator:
+
+Zdenek's implementation uses srand() for random number generation, and seed it
+with 0. ccv's implementation uses a Mersenne-Twister random number generator with
+an environment-dependent seed.
diff --git a/lib/ccv_tld.c b/lib/ccv_tld.c
@@ -15,7 +15,7 @@ const ccv_tld_param_t ccv_tld_default_params = {
 	.interval = 3,
 	.shift = 0.1,
 	.top_n = 100,
-	.rotation = 1,
+	.rotation = 0,
 	.include_overlap = 0.7,
 	.exclude_overlap = 0.2,
 	.structs = 40,