diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp index de99bc3033f..29daf09f656 100644 --- a/include/caffe/vision_layers.hpp +++ b/include/caffe/vision_layers.hpp @@ -17,8 +17,34 @@ namespace caffe { -/* -ConcatLayer +/* ArgmaxLayer + Compute the index of the max value across all (channels x height x width). + [In the future, can take specific dimension.] + Intended for use after a classification layer to produce prediction. + If parameter out_max_val is set to true, then output is a vector of pairs + (max_ind, max_val) for each image. + + NOTE: does not implement Backwards operation. +*/ +template +class ArgMaxLayer : public Layer { + public: + explicit ArgMaxLayer(const LayerParameter& param) + : Layer(param) {} + virtual void SetUp(const vector*>& bottom, + vector*>* top); + + protected: + virtual Dtype Forward_cpu(const vector*>& bottom, + vector*>* top); + virtual void Backward_cpu(const vector*>& top, + const bool propagate_down, vector*>* bottom) { + NOT_IMPLEMENTED; + } + bool out_max_val_; +}; + +/* ConcatLayer Takes at least two blobs and concatenates them along either num or channel dim, outputting the result. */ @@ -107,6 +133,8 @@ class EltwiseProductLayer : public Layer { const bool propagate_down, vector*>* bottom); }; +/* FlattenLayer +*/ template class FlattenLayer : public Layer { public: @@ -289,6 +317,8 @@ class MemoryDataLayer : public Layer { int pos_; }; +/* PoolingLayer +*/ template class PoolingLayer : public Layer { public: diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 2991c81f559..ae15ba5bb44 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -24,6 +24,8 @@ Layer* GetLayer(const LayerParameter& param) { switch (type) { case LayerParameter_LayerType_ACCURACY: return new AccuracyLayer(param); + case LayerParameter_LayerType_ARGMAX: + return new ArgMaxLayer(param); case LayerParameter_LayerType_BNLL: return new BNLLLayer(param); case LayerParameter_LayerType_CONCAT: diff --git a/src/caffe/layers/argmax_layer.cpp b/src/caffe/layers/argmax_layer.cpp new file mode 100644 index 00000000000..e04588d4baa --- /dev/null +++ b/src/caffe/layers/argmax_layer.cpp @@ -0,0 +1,57 @@ +// Copyright 2014 BVLC and contributors. + +#include +#include + +#include "caffe/layer.hpp" +#include "caffe/vision_layers.hpp" + + +namespace caffe { + +template +void ArgMaxLayer::SetUp(const vector*>& bottom, + vector*>* top) { + CHECK_EQ(bottom.size(), 1) << "ArgMaxLayer Layer takes 1 input."; + CHECK_EQ(top->size(), 1) << "ArgMaxLayer Layer takes 1 output."; + out_max_val_ = this->layer_param_.argmax_param().out_max_val(); + if (out_max_val_) { + // Produces max_ind and max_val + (*top)[0]->Reshape(bottom[0]->num(), 2, 1, 1); + } else { + // Produces only max_ind + (*top)[0]->Reshape(bottom[0]->num(), 1, 1, 1); + } +} + +template +Dtype ArgMaxLayer::Forward_cpu(const vector*>& bottom, + vector*>* top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + Dtype* top_data = (*top)[0]->mutable_cpu_data(); + int num = bottom[0]->num(); + int dim = bottom[0]->count() / bottom[0]->num(); + for (int i = 0; i < num; ++i) { + // Accuracy + Dtype max_val = -FLT_MAX; + int max_ind = 0; + for (int j = 0; j < dim; ++j) { + if (bottom_data[i * dim + j] > max_val) { + max_val = bottom_data[i * dim + j]; + max_ind = j; + } + } + if (out_max_val_) { + top_data[i * 2] = max_ind; + top_data[i * 2 + 1] = max_val; + } else { + top_data[i] = max_ind; + } + } + return Dtype(0); +} + +INSTANTIATE_CLASS(ArgMaxLayer); + + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index ce2f25b8db5..2d9a1aa8519 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -112,26 +112,29 @@ message SolverState { repeated BlobProto history = 3; // The history for sgd solvers } +// NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available ID: 23 (last added: memory_data_param) +// LayerParameter next available ID: 24 (last added: argmax_param) message LayerParameter { repeated string bottom = 2; // the name of the bottom blobs repeated string top = 3; // the name of the top blobs optional string name = 4; // the layer name + // NOTE // Add new LayerTypes to the enum below in lexicographical order (other than // starting with NONE), starting with the next available ID in the comment // line above the enum. Update the next available ID when you add a new // LayerType. // - // LayerType next available ID: 30 (last added: MEMORY_DATA) + // LayerType next available ID: 31 (last added: ARGMAX) enum LayerType { // "NONE" layer type is 0th enum element so that we don't cause confusion // by defaulting to an existent LayerType (instead, should usually error if // the type is unspecified). NONE = 0; ACCURACY = 1; + ARGMAX = 30; BNLL = 2; CONCAT = 3; CONVOLUTION = 4; @@ -172,6 +175,7 @@ message LayerParameter { repeated float weight_decay = 8; // Parameters for particular layer types. + optional ArgMaxParameter argmax_param = 23; optional ConcatParameter concat_param = 9; optional ConvolutionParameter convolution_param = 10; optional DataParameter data_param = 11; @@ -193,6 +197,12 @@ message LayerParameter { optional V0LayerParameter layer = 1; } +// Message that stores parameters used by ArgMaxLayer +message ArgMaxParameter { + // If true produce pairs (argmax, maxval) + optional bool out_max_val = 1 [default = false]; +} + // Message that stores parameters used by ConcatLayer message ConcatParameter { // Concat Layer needs to specify the dimension along the concat will happen, diff --git a/src/caffe/test/test_argmax_layer.cpp b/src/caffe/test/test_argmax_layer.cpp new file mode 100644 index 00000000000..ab2d75b322c --- /dev/null +++ b/src/caffe/test/test_argmax_layer.cpp @@ -0,0 +1,113 @@ +// Copyright 2014 BVLC and contributors. + +#include + +#include "cuda_runtime.h" +#include "gtest/gtest.h" +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/vision_layers.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +#include "caffe/test/test_caffe_main.hpp" + +namespace caffe { + +extern cudaDeviceProp CAFFE_TEST_CUDA_PROP; + +template +class ArgMaxLayerTest : public ::testing::Test { + protected: + ArgMaxLayerTest() + : blob_bottom_(new Blob(20, 10, 1, 1)), + blob_top_(new Blob()) { + Caffe::set_random_seed(this->seed_); + // fill the values + FillerParameter filler_param; + GaussianFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~ArgMaxLayerTest() { delete blob_bottom_; delete blob_top_; } + Blob* const blob_bottom_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +typedef ::testing::Types Dtypes; +TYPED_TEST_CASE(ArgMaxLayerTest, Dtypes); + + +TYPED_TEST(ArgMaxLayerTest, TestSetup) { + LayerParameter layer_param; + ArgMaxLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_->num()); + EXPECT_EQ(this->blob_top_->channels(), 1); +} + +TYPED_TEST(ArgMaxLayerTest, TestSetupMaxVal) { + LayerParameter layer_param; + ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param(); + argmax_param->set_out_max_val(true); + ArgMaxLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_->num()); + EXPECT_EQ(this->blob_top_->channels(), 2); +} + +TYPED_TEST(ArgMaxLayerTest, TestCPU) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); + ArgMaxLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + // Now, check values + const TypeParam* bottom_data = this->blob_bottom_->cpu_data(); + const TypeParam* top_data = this->blob_top_->cpu_data(); + int max_ind; + TypeParam max_val; + int num = this->blob_bottom_->num(); + int dim = this->blob_bottom_->count() / num; + for (int i = 0; i < num; ++i) { + EXPECT_GE(top_data[i], 0); + EXPECT_LE(top_data[i], dim); + max_ind = top_data[i]; + max_val = bottom_data[i * dim + max_ind]; + for (int j = 0; j < dim; ++j) { + EXPECT_LE(bottom_data[i * dim + j], max_val); + } + } +} + +TYPED_TEST(ArgMaxLayerTest, TestCPUMaxVal) { + LayerParameter layer_param; + Caffe::set_mode(Caffe::CPU); + ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param(); + argmax_param->set_out_max_val(true); + ArgMaxLayer layer(layer_param); + layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_)); + layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_)); + // Now, check values + const TypeParam* bottom_data = this->blob_bottom_->cpu_data(); + const TypeParam* top_data = this->blob_top_->cpu_data(); + int max_ind; + TypeParam max_val; + int num = this->blob_bottom_->num(); + int dim = this->blob_bottom_->count() / num; + for (int i = 0; i < num; ++i) { + EXPECT_GE(top_data[i], 0); + EXPECT_LE(top_data[i], dim); + max_ind = top_data[i * 2]; + max_val = top_data[i * 2 + 1]; + EXPECT_EQ(bottom_data[i * dim + max_ind], max_val); + for (int j = 0; j < dim; ++j) { + EXPECT_LE(bottom_data[i * dim + j], max_val); + } + } +} + +} // namespace caffe