Merge pull request BVLC#421 from sguada/argmax_layer

justhjn · May 21, 2014 · efbea35 · efbea35
2 parents c30bb24 + 4d52ca7
commit efbea35
Show file tree

Hide file tree

Showing 5 changed files with 216 additions and 4 deletions.
diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
@@ -17,8 +17,34 @@
 
 namespace caffe {
 
-/*
-ConcatLayer
+/* ArgmaxLayer
+  Compute the index of the max value across all (channels x height x width).
+  [In the future, can take specific dimension.]
+  Intended for use after a classification layer to produce prediction.
+  If parameter out_max_val is set to true, then output is a vector of pairs
+  (max_ind, max_val) for each image.
+
+  NOTE: does not implement Backwards operation.
+*/
+template <typename Dtype>
+class ArgMaxLayer : public Layer<Dtype> {
+ public:
+  explicit ArgMaxLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+
+ protected:
+  virtual Dtype Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+    NOT_IMPLEMENTED;
+  }
+  bool out_max_val_;
+};
+
+/* ConcatLayer
   Takes at least two blobs and concatenates them along either num or
   channel dim, outputting the result.
 */
@@ -107,6 +133,8 @@ class EltwiseProductLayer : public Layer<Dtype> {
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
 };
 
+/* FlattenLayer
+*/
 template <typename Dtype>
 class FlattenLayer : public Layer<Dtype> {
  public:
@@ -289,6 +317,8 @@ class MemoryDataLayer : public Layer<Dtype> {
   int pos_;
 };
 
+/* PoolingLayer
+*/
 template <typename Dtype>
 class PoolingLayer : public Layer<Dtype> {
  public:

diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
@@ -24,6 +24,8 @@ Layer<Dtype>* GetLayer(const LayerParameter& param) {
   switch (type) {
   case LayerParameter_LayerType_ACCURACY:
     return new AccuracyLayer<Dtype>(param);
+  case LayerParameter_LayerType_ARGMAX:
+    return new ArgMaxLayer<Dtype>(param);
   case LayerParameter_LayerType_BNLL:
     return new BNLLLayer<Dtype>(param);
   case LayerParameter_LayerType_CONCAT:

diff --git a/src/caffe/layers/argmax_layer.cpp b/src/caffe/layers/argmax_layer.cpp
@@ -0,0 +1,57 @@
+// Copyright 2014 BVLC and contributors.
+
+#include <vector>
+#include <cfloat>
+
+#include "caffe/layer.hpp"
+#include "caffe/vision_layers.hpp"
+
+
+namespace caffe {
+
+template <typename Dtype>
+void ArgMaxLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  CHECK_EQ(bottom.size(), 1) << "ArgMaxLayer Layer takes 1 input.";
+  CHECK_EQ(top->size(), 1) << "ArgMaxLayer Layer takes 1 output.";
+  out_max_val_ = this->layer_param_.argmax_param().out_max_val();
+  if (out_max_val_) {
+    // Produces max_ind and max_val
+    (*top)[0]->Reshape(bottom[0]->num(), 2, 1, 1);
+  } else {
+    // Produces only max_ind
+    (*top)[0]->Reshape(bottom[0]->num(), 1, 1, 1);
+  }
+}
+
+template <typename Dtype>
+Dtype ArgMaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+    vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  Dtype* top_data = (*top)[0]->mutable_cpu_data();
+  int num = bottom[0]->num();
+  int dim = bottom[0]->count() / bottom[0]->num();
+  for (int i = 0; i < num; ++i) {
+    // Accuracy
+    Dtype max_val = -FLT_MAX;
+    int max_ind = 0;
+    for (int j = 0; j < dim; ++j) {
+      if (bottom_data[i * dim + j] > max_val) {
+        max_val = bottom_data[i * dim + j];
+        max_ind = j;
+      }
+    }
+    if (out_max_val_) {
+      top_data[i * 2] = max_ind;
+      top_data[i * 2 + 1] = max_val;
+    } else {
+      top_data[i] = max_ind;
+    }
+  }
+  return Dtype(0);
+}
+
+INSTANTIATE_CLASS(ArgMaxLayer);
+
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
@@ -112,26 +112,29 @@ message SolverState {
   repeated BlobProto history = 3; // The history for sgd solvers
 }
 
+// NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available ID: 23 (last added: memory_data_param)
+// LayerParameter next available ID: 24 (last added: argmax_param)
 message LayerParameter {
   repeated string bottom = 2; // the name of the bottom blobs
   repeated string top = 3; // the name of the top blobs
   optional string name = 4; // the layer name
 
+  // NOTE
   // Add new LayerTypes to the enum below in lexicographical order (other than
   // starting with NONE), starting with the next available ID in the comment
   // line above the enum. Update the next available ID when you add a new
   // LayerType.
   //
-  // LayerType next available ID: 30 (last added: MEMORY_DATA)
+  // LayerType next available ID: 31 (last added: ARGMAX)
   enum LayerType {
     // "NONE" layer type is 0th enum element so that we don't cause confusion
     // by defaulting to an existent LayerType (instead, should usually error if
     // the type is unspecified).
     NONE = 0;
     ACCURACY = 1;
+    ARGMAX = 30;
     BNLL = 2;
     CONCAT = 3;
     CONVOLUTION = 4;
@@ -172,6 +175,7 @@ message LayerParameter {
   repeated float weight_decay = 8;
 
   // Parameters for particular layer types.
+  optional ArgMaxParameter argmax_param = 23;
   optional ConcatParameter concat_param = 9;
   optional ConvolutionParameter convolution_param = 10;
   optional DataParameter data_param = 11;
@@ -193,6 +197,12 @@ message LayerParameter {
   optional V0LayerParameter layer = 1;
 }
 
+// Message that stores parameters used by ArgMaxLayer
+message ArgMaxParameter {
+  // If true produce pairs (argmax, maxval)
+  optional bool out_max_val = 1 [default = false];
+}
+
 // Message that stores parameters used by ConcatLayer
 message ConcatParameter {
   // Concat Layer needs to specify the dimension along the concat will happen,

diff --git a/src/caffe/test/test_argmax_layer.cpp b/src/caffe/test/test_argmax_layer.cpp
@@ -0,0 +1,113 @@
+// Copyright 2014 BVLC and contributors.
+
+#include <vector>
+
+#include "cuda_runtime.h"
+#include "gtest/gtest.h"
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+
+namespace caffe {
+
+extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
+
+template <typename Dtype>
+class ArgMaxLayerTest : public ::testing::Test {
+ protected:
+  ArgMaxLayerTest()
+      : blob_bottom_(new Blob<Dtype>(20, 10, 1, 1)),
+        blob_top_(new Blob<Dtype>()) {
+    Caffe::set_random_seed(this->seed_);
+    // fill the values
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+    blob_bottom_vec_.push_back(blob_bottom_);
+    blob_top_vec_.push_back(blob_top_);
+  }
+  virtual ~ArgMaxLayerTest() { delete blob_bottom_; delete blob_top_; }
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+typedef ::testing::Types<float, double> Dtypes;
+TYPED_TEST_CASE(ArgMaxLayerTest, Dtypes);
+
+
+TYPED_TEST(ArgMaxLayerTest, TestSetup) {
+  LayerParameter layer_param;
+  ArgMaxLayer<TypeParam> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_->num());
+  EXPECT_EQ(this->blob_top_->channels(), 1);
+}
+
+TYPED_TEST(ArgMaxLayerTest, TestSetupMaxVal) {
+  LayerParameter layer_param;
+  ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param();
+  argmax_param->set_out_max_val(true);
+  ArgMaxLayer<TypeParam> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_->num());
+  EXPECT_EQ(this->blob_top_->channels(), 2);
+}
+
+TYPED_TEST(ArgMaxLayerTest, TestCPU) {
+  LayerParameter layer_param;
+  Caffe::set_mode(Caffe::CPU);
+  ArgMaxLayer<TypeParam> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  // Now, check values
+  const TypeParam* bottom_data = this->blob_bottom_->cpu_data();
+  const TypeParam* top_data = this->blob_top_->cpu_data();
+  int max_ind;
+  TypeParam max_val;
+  int num = this->blob_bottom_->num();
+  int dim = this->blob_bottom_->count() / num;
+  for (int i = 0; i < num; ++i) {
+    EXPECT_GE(top_data[i], 0);
+    EXPECT_LE(top_data[i], dim);
+    max_ind = top_data[i];
+    max_val = bottom_data[i * dim + max_ind];
+    for (int j = 0; j < dim; ++j) {
+      EXPECT_LE(bottom_data[i * dim + j], max_val);
+    }
+  }
+}
+
+TYPED_TEST(ArgMaxLayerTest, TestCPUMaxVal) {
+  LayerParameter layer_param;
+  Caffe::set_mode(Caffe::CPU);
+  ArgMaxParameter* argmax_param = layer_param.mutable_argmax_param();
+  argmax_param->set_out_max_val(true);
+  ArgMaxLayer<TypeParam> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  // Now, check values
+  const TypeParam* bottom_data = this->blob_bottom_->cpu_data();
+  const TypeParam* top_data = this->blob_top_->cpu_data();
+  int max_ind;
+  TypeParam max_val;
+  int num = this->blob_bottom_->num();
+  int dim = this->blob_bottom_->count() / num;
+  for (int i = 0; i < num; ++i) {
+    EXPECT_GE(top_data[i], 0);
+    EXPECT_LE(top_data[i], dim);
+    max_ind = top_data[i * 2];
+    max_val = top_data[i * 2 + 1];
+    EXPECT_EQ(bottom_data[i * dim + max_ind], max_val);
+    for (int j = 0; j < dim; ++j) {
+      EXPECT_LE(bottom_data[i * dim + j], max_val);
+    }
+  }
+}
+
+}  // namespace caffe