diff --git a/include/caffe/layers/cudnn_conv_layer.hpp b/include/caffe/layers/cudnn_conv_layer.hpp index 1a0b5aac..b0ddd08a 100644 --- a/include/caffe/layers/cudnn_conv_layer.hpp +++ b/include/caffe/layers/cudnn_conv_layer.hpp @@ -30,7 +30,8 @@ template class CuDNNConvolutionLayer : public ConvolutionLayer { public: explicit CuDNNConvolutionLayer(const LayerParameter& param) - : ConvolutionLayer(param), handles_setup_(false) {} + : ConvolutionLayer(param), handles_setup_(false), + weight_initialized_{false} {}// Binary added: weight_initialized_ virtual void LayerSetUp(const vector*>& bottom, const vector*>& top); virtual void Reshape(const vector*>& bottom, @@ -66,8 +67,9 @@ class CuDNNConvolutionLayer : public ConvolutionLayer { void **workspace; // aliases into workspaceData // Binary net added + bool weight_initialized_; std::unique_ptr> weight_binary_; - void normalizeWeights(const bool truncateOriginalWeights); + void normalizeWeights(); // Binary net end }; #endif diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp index 7bedb12b..ba196595 100644 --- a/src/caffe/layers/cudnn_conv_layer.cpp +++ b/src/caffe/layers/cudnn_conv_layer.cpp @@ -229,19 +229,6 @@ void CuDNNConvolutionLayer::Reshape( cudnn::setTensor4dDesc(&bias_desc_, 1, this->num_output_ / this->group_, 1, 1); } - - // Binary added - if (this->layer_param_.convolution_param().binary()) - { - CHECK_GE(this->blobs_.size(), 1); - CHECK_GT(this->blobs_[0]->shape().size(), 2u); - weight_binary_.reset(new Blob()); - weight_binary_->Reshape(this->blobs_[0]->shape()); - // Data to weightReal - const bool truncateOriginalWeights = true; - normalizeWeights(true); - } - // Binary added end } template @@ -278,7 +265,7 @@ CuDNNConvolutionLayer::~CuDNNConvolutionLayer() { // Binary added template -void CuDNNConvolutionLayer::normalizeWeights(const bool truncateOriginalWeights) +void CuDNNConvolutionLayer::normalizeWeights() { // Data to weightReal auto* weightBinaryData = weight_binary_->mutable_cpu_data(); @@ -293,7 +280,8 @@ void CuDNNConvolutionLayer::normalizeWeights(const bool truncateOriginalW for (auto channel = 0 ; channel < weight_binary_->shape()[1] ; channel++) { const auto offset = offsetNum + channel * imageArea; - // XNOR-style + + // // XNOR-style // // L1 norm // auto l1Norm = Dtype(0); // for (auto i = 0 ; i < imageArea ; i++) @@ -307,17 +295,14 @@ void CuDNNConvolutionLayer::normalizeWeights(const bool truncateOriginalW // const auto sum = l1Norm / imageArea; // for (auto i = 0 ; i < imageArea ; i++) // weightBinaryData[offset+i] = (weightRealData[offset+i] < 0 ? -sum : sum); + // Old binary net style // truncate to +-1 for (auto i = 0 ; i < imageArea ; i++) weightRealData[offset+i] = std::max(-Dtype(1), std::min(Dtype(1), weightRealData[offset+i])); // Binary approximation - if (truncateOriginalWeights) - for (auto i = 0 ; i < imageArea ; i++) - weightRealData[offset+i] = (weightRealData[offset+i] < 0 ? -Dtype(1) : Dtype(1)); - else - for (auto i = 0 ; i < imageArea ; i++) - weightBinaryData[offset+i] = (weightRealData[offset+i] < 0 ? -Dtype(1) : Dtype(1)); + for (auto i = 0 ; i < imageArea ; i++) + weightBinaryData[offset+i] = (weightRealData[offset+i] < 0 ? -Dtype(1) : Dtype(1)); } } } diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu index 495189d9..60647535 100644 --- a/src/caffe/layers/cudnn_conv_layer.cu +++ b/src/caffe/layers/cudnn_conv_layer.cu @@ -11,13 +11,31 @@ template void CuDNNConvolutionLayer::Forward_gpu( const vector*>& bottom, const vector*>& top) { // Binary added - if (this->layer_param_.convolution_param().binary() && this->phase_ == TRAIN) - normalizeWeights(false); + if (this->layer_param_.convolution_param().binary()) + { + // TRAIN + if (this->phase_ == TRAIN) + normalizeWeights(); + // TEST + only first time + else if (!weight_initialized_) + { + weight_initialized_ = true; + CHECK_GE(this->blobs_.size(), 1); + CHECK_GT(this->blobs_[0]->shape().size(), 2u); + weight_binary_.reset(new Blob()); + weight_binary_->Reshape(this->blobs_[0]->shape()); + // Data to weightReal + normalizeWeights(); + } + } // Binary added end // const Dtype* weight = this->blobs_[0]->gpu_data(); // Binary commented // Binary added - const Dtype* weight = (this->layer_param_.convolution_param().binary() && this->phase_ == TRAIN + // const Dtype* weight = weight_binary_->gpu_data(); + // const Dtype* weight = (this->layer_param_.convolution_param().binary() && this->phase_ == TRAIN + // ? weight_binary_->gpu_data() : this->blobs_[0]->gpu_data()); + const Dtype* weight = (this->layer_param_.convolution_param().binary() ? weight_binary_->gpu_data() : this->blobs_[0]->gpu_data()); // Binary added ended for (int i = 0; i < bottom.size(); ++i) {