Skip to content

Commit

Permalink
Merge pull request BVLC#136 from kloudkl/cuda_timing
Browse files Browse the repository at this point in the history
Add Timer class unifying CPU and GPU timer and use it in net_speed_benchmark
  • Loading branch information
shelhamer committed Mar 13, 2014
2 parents 2690b10 + cc509a9 commit 04d6595
Show file tree
Hide file tree
Showing 4 changed files with 314 additions and 27 deletions.
39 changes: 39 additions & 0 deletions include/caffe/util/benchmark.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright 2014 kloud@github

#ifndef CAFFE_UTIL_BENCHMARK_H_
#define CAFFE_UTIL_BENCHMARK_H_

#include <boost/date_time/posix_time/posix_time.hpp>
#include <cuda_runtime.h>

namespace caffe {

class Timer {
public:
Timer();
virtual ~Timer();
void Start();
void Stop();
float MilliSeconds();
float Seconds();

inline bool initted() { return initted_; }
inline bool running() { return running_; }
inline bool has_run_at_least_once() { return has_run_at_least_once_; }

protected:
void Init();

bool initted_;
bool running_;
bool has_run_at_least_once_;
cudaEvent_t start_gpu_;
cudaEvent_t stop_gpu_;
boost::posix_time::ptime start_cpu_;
boost::posix_time::ptime stop_cpu_;
float elapsed_milliseconds_;
};

} // namespace caffe

#endif // CAFFE_UTIL_BENCHMARK_H_
169 changes: 169 additions & 0 deletions src/caffe/test/test_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// Copyright 2014 kloud@github

#include <unistd.h> // for usleep
#include <cuda_runtime.h>
#include <gtest/gtest.h>

#include "caffe/common.hpp"
#include "caffe/util/benchmark.hpp"
#include "caffe/test/test_caffe_main.hpp"

namespace caffe {

extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;

class BenchmarkTest : public ::testing::Test {};

TEST_F(BenchmarkTest, TestTimerConstructorCPU) {
Caffe::set_mode(Caffe::CPU);
Timer timer;
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_FALSE(timer.has_run_at_least_once());
}

TEST_F(BenchmarkTest, TestTimerConstructorGPU) {
Caffe::set_mode(Caffe::GPU);
Timer timer;
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_FALSE(timer.has_run_at_least_once());
}

TEST_F(BenchmarkTest, TestTimerStartCPU) {
Caffe::set_mode(Caffe::CPU);
Timer timer;
timer.Start();
EXPECT_TRUE(timer.initted());
EXPECT_TRUE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
timer.Start();
EXPECT_TRUE(timer.initted());
EXPECT_TRUE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
timer.Stop();
timer.Start();
EXPECT_TRUE(timer.initted());
EXPECT_TRUE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
}

TEST_F(BenchmarkTest, TestTimerStartGPU) {
Caffe::set_mode(Caffe::GPU);
Timer timer;
timer.Start();
EXPECT_TRUE(timer.initted());
EXPECT_TRUE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
timer.Stop();
timer.Start();
EXPECT_TRUE(timer.initted());
EXPECT_TRUE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
timer.Start();
EXPECT_TRUE(timer.initted());
EXPECT_TRUE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
}

TEST_F(BenchmarkTest, TestTimerStopCPU) {
Caffe::set_mode(Caffe::CPU);
Timer timer;
timer.Stop();
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_FALSE(timer.has_run_at_least_once());
timer.Start();
timer.Stop();
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
timer.Stop();
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
}

TEST_F(BenchmarkTest, TestTimerStopGPU) {
Caffe::set_mode(Caffe::GPU);
Timer timer;
timer.Stop();
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_FALSE(timer.has_run_at_least_once());
timer.Start();
timer.Stop();
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
timer.Stop();
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
}

TEST_F(BenchmarkTest, TestTimerMilliSecondsCPU) {
Caffe::set_mode(Caffe::CPU);
Timer timer;
CHECK_EQ(timer.MilliSeconds(), 0);
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_FALSE(timer.has_run_at_least_once());
timer.Start();
usleep(300 * 1000);
CHECK_GE(timer.MilliSeconds(), 299);
CHECK_LE(timer.MilliSeconds(), 301);
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
}

TEST_F(BenchmarkTest, TestTimerMilliSecondsGPU) {
Caffe::set_mode(Caffe::GPU);
Timer timer;
CHECK_EQ(timer.MilliSeconds(), 0);
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_FALSE(timer.has_run_at_least_once());
timer.Start();
usleep(300 * 1000);
CHECK_GE(timer.MilliSeconds(), 299);
CHECK_LE(timer.MilliSeconds(), 301);
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
}

TEST_F(BenchmarkTest, TestTimerSecondsCPU) {
Caffe::set_mode(Caffe::CPU);
Timer timer;
CHECK_EQ(timer.Seconds(), 0);
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_FALSE(timer.has_run_at_least_once());
timer.Start();
usleep(300 * 1000);
CHECK_GE(timer.Seconds(), 0.299);
CHECK_LE(timer.Seconds(), 0.301);
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
}

TEST_F(BenchmarkTest, TestTimerSecondsGPU) {
Caffe::set_mode(Caffe::GPU);
Timer timer;
CHECK_EQ(timer.Seconds(), 0);
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_FALSE(timer.has_run_at_least_once());
timer.Start();
usleep(300 * 1000);
CHECK_GE(timer.Seconds(), 0.299);
CHECK_LE(timer.Seconds(), 0.301);
EXPECT_TRUE(timer.initted());
EXPECT_FALSE(timer.running());
EXPECT_TRUE(timer.has_run_at_least_once());
}

} // namespace caffe
80 changes: 80 additions & 0 deletions src/caffe/util/benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright 2014 kloud@github

#include <boost/date_time/posix_time/posix_time.hpp>
#include <cuda_runtime.h>

#include "caffe/common.hpp"
#include "caffe/util/benchmark.hpp"

namespace caffe {

Timer::Timer()
: initted_(false),
running_(false),
has_run_at_least_once_(false) {
Init();
}

Timer::~Timer() {
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaEventDestroy(start_gpu_));
CUDA_CHECK(cudaEventDestroy(stop_gpu_));
}
}

void Timer::Start() {
if (!running()) {
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaEventRecord(start_gpu_, 0));
} else {
start_cpu_ = boost::posix_time::microsec_clock::local_time();
}
running_ = true;
has_run_at_least_once_ = true;
}
}

void Timer::Stop() {
if (running()) {
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaEventRecord(stop_gpu_, 0));
CUDA_CHECK(cudaEventSynchronize(stop_gpu_));
} else {
stop_cpu_ = boost::posix_time::microsec_clock::local_time();
}
running_ = false;
}
}

float Timer::MilliSeconds() {
if (!has_run_at_least_once()) {
LOG(WARNING) << "Timer has never been run before reading time.";
return 0;
}
if (running()) {
Stop();
}
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_,
stop_gpu_));
} else {
elapsed_milliseconds_ = (stop_cpu_ - start_cpu_).total_milliseconds();
}
return elapsed_milliseconds_;
}

float Timer::Seconds() {
return MilliSeconds() / 1000.;
}

void Timer::Init() {
if (!initted()) {
if (Caffe::mode() == Caffe::GPU) {
CUDA_CHECK(cudaEventCreate(&start_gpu_));
CUDA_CHECK(cudaEventCreate(&stop_gpu_));
}
initted_ = true;
}
}

} // namespace caffe
53 changes: 26 additions & 27 deletions tools/net_speed_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -1,31 +1,30 @@
// Copyright 2013 Yangqing Jia

#include <cuda_runtime.h>
#include <fcntl.h>
#include <google/protobuf/text_format.h>

#include <cstring>
#include <ctime>
#include <string>
#include <vector>

#include "cuda_runtime.h"
#include "fcntl.h"
#include "google/protobuf/text_format.h"

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/net.hpp"
#include "caffe/filler.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/benchmark.hpp"
#include "caffe/util/io.hpp"
#include "caffe/solver.hpp"

using boost::shared_ptr;

using namespace caffe; // NOLINT(build/namespaces)

int main(int argc, char** argv) {
int total_iter = 50;

if (argc < 2) {
LOG(ERROR) << "net_speed_benchmark net_proto [iterations=50] [CPU/GPU] "
<< "[Device_id=0]";
LOG(ERROR) << "net_speed_benchmark net_proto [iterations=50]"
" [CPU/GPU] [Device_id=0]";
return 0;
}

Expand Down Expand Up @@ -67,37 +66,37 @@ int main(int argc, char** argv) {
vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
LOG(ERROR) << "*** Benchmark begins ***";
clock_t forward_start = clock();
Timer total_timer;
total_timer.Start();
Timer forward_timer;
forward_timer.Start();
Timer timer;
for (int i = 0; i < layers.size(); ++i) {
const string& layername = layers[i]->layer_param().name();
clock_t start = clock();
timer.Start();
for (int j = 0; j < total_iter; ++j) {
layers[i]->Forward(bottom_vecs[i], &top_vecs[i]);
}
LOG(ERROR) << layername << "\tforward: "
<< static_cast<float>(clock() - start) / CLOCKS_PER_SEC
<< " seconds.";
LOG(ERROR) << layername << "\tforward: " << timer.MilliSeconds() <<
" milli seconds.";
}
LOG(ERROR) << "Forward pass: "
<< static_cast<float>(clock() - forward_start) / CLOCKS_PER_SEC
<< " seconds.";
clock_t backward_start = clock();
LOG(ERROR) << "Forward pass: " << forward_timer.MilliSeconds() <<
" milli seconds.";
Timer backward_timer;
backward_timer.Start();
for (int i = layers.size() - 1; i >= 0; --i) {
const string& layername = layers[i]->layer_param().name();
clock_t start = clock();
timer.Start();
for (int j = 0; j < total_iter; ++j) {
layers[i]->Backward(top_vecs[i], true, &bottom_vecs[i]);
}
LOG(ERROR) << layername << "\tbackward: "
<< static_cast<float>(clock() - start) / CLOCKS_PER_SEC
<< " seconds.";
<< timer.MilliSeconds() << " milli seconds.";
}
LOG(ERROR) << "Backward pass: "
<< static_cast<float>(clock() - backward_start) / CLOCKS_PER_SEC
<< " seconds.";
LOG(ERROR) << "Total Time: "
<< static_cast<float>(clock() - forward_start) / CLOCKS_PER_SEC
<< " seconds.";
LOG(ERROR) << "Backward pass: " << backward_timer.MilliSeconds() <<
" milli seconds.";
LOG(ERROR) << "Total Time: " << total_timer.MilliSeconds() <<
" milli seconds.";
LOG(ERROR) << "*** Benchmark ends ***";
return 0;
}

0 comments on commit 04d6595

Please sign in to comment.