Tests refactor

pwlnk · Mar 23, 2018 · 9e9d48a · 9e9d48a
1 parent 519e3e6
commit 9e9d48a
Show file tree

Hide file tree

Showing 16 changed files with 155 additions and 115 deletions.
diff --git a/cuda-neural-network-test/test/all_tests.cu b/cuda-neural-network-test/test/all_tests.cu
@@ -1,11 +1,11 @@
 #include <time.h>
 #include "gtest/gtest.h"
 
-#include "nn_utils_test.cu"
+//#include "nn_utils_test.cu"
 #include "linear_layer_test.cu"
-#include "relu_activation_test.cu"
-#include "sigmoid_activation_test.cu"
-#include "neural_network_test.cu"
+//#include "relu_activation_test.cu"
+//#include "sigmoid_activation_test.cu"
+//#include "neural_network_test.cu"
 
 int main(int argc, char **argv)
 {

diff --git a/cuda-neural-network-test/test/linear_layer_test.cu b/cuda-neural-network-test/test/linear_layer_test.cu
@@ -1,22 +1,32 @@
 #include <iostream>
+#include <vector>
 
 #include "gtest/gtest.h"
 #include "test_utils.hh"
 #include "linear_layer.hh"
+#include "nn_utils.hh"
 
 namespace {
 
 	class LinearLayerTest : public ::testing::Test {
 	protected:
 		LinearLayer linear_layer;
+		nn_utils::Shape W_shape = nn_utils::Shape(2, 4);
+
 		nn_utils::Tensor3D A;
+		nn_utils::Tensor3D dZ;
 
 		LinearLayerTest() :
-			linear_layer("some_linear_layer", nn_utils::Shape(10, 20))
-		{ }
+			A(nn_utils::Shape(3, 2)), dZ(nn_utils::Shape(3, 4)),
+			linear_layer("some_linear_layer", nn_utils::Shape(2, 4))
+		{
+			A.allocateCudaMemory();
+			dZ.allocateCudaMemory();
+		}
 
 		virtual void TearDown() {
 			cudaFree(A.data);
+			cudaFree(dZ.data);
 		}
 	};
 
@@ -36,8 +46,8 @@ namespace {
 		int y_dim = linear_layer.getYDim();
 
 		// then
-		EXPECT_EQ(x_dim, 10);
-		EXPECT_EQ(y_dim, 20);
+		ASSERT_EQ(x_dim, W_shape.x);
+		ASSERT_EQ(y_dim, W_shape.y);
 	}
 
 	TEST_F(LinearLayerTest, ShouldHaveInitializedBiasVectorWithZeros) {
@@ -46,10 +56,10 @@ namespace {
 		const nn_utils::Tensor3D b = linear_layer.getBiasVector();
 
 		// then
-		ASSERT_EQ(b.shape.x, linear_layer.getYDim());
+		ASSERT_EQ(b.shape.x, W_shape.y);
 		ASSERT_EQ(b.shape.y, 1);
 		for (int x = 0; x < b.shape.x; x++) {
-			ASSERT_EQ(b.data[x], 0);
+			ASSERT_EQ(b[x], 0);
 		}
 	}
 
@@ -59,58 +69,51 @@ namespace {
 		const nn_utils::Tensor3D W = linear_layer.getWeightsMatrix();
 
 		// then
-		float prev_weight_val = -1.0;
+		float prev_weight_value = -1.0;
 		for (int x = 0; x < W.shape.x; x++) {
 			for (int y = 0; y < W.shape.y; y++) {
-				ASSERT_GE(W.data[y * W.shape.x + x], 0);
-				ASSERT_LE(W.data[y * W.shape.x + x], 0.01);
-				ASSERT_NE(W.data[y * W.shape.x + x], prev_weight_val);
-				prev_weight_val = W.data[y * W.shape.x + x];
+				ASSERT_GE(W[y * W.shape.x + x], 0);
+				ASSERT_LE(W[y * W.shape.x + x], 0.01);
+				ASSERT_NE(W[y * W.shape.x + x], prev_weight_value);
+				prev_weight_value = W[y * W.shape.x + x];
 			}
 		}
 	}
 
-	TEST_F(LinearLayerTest, ShouldPerformForwardProp) {
+	TEST_F(LinearLayerTest, ShouldReturnOutputAfterForwardProp) {
 		// given
-		float bias_val = 5;
+		std::vector<float> b_cols_values = {1, 2, 3, 4};
+		std::vector<float> W_rows_values = {2, 4, 6, 8};
+		std::vector<float> A_cols_values = {3, 5, 7};
 
-		A.shape.x = 10;
-		A.shape.y = linear_layer.getXDim();;
-		A.allocateCudaMemory();
-
-		testutils::initializeTensorWithValue(linear_layer.W, 2);
-		testutils::initializeTensorWithValue(linear_layer.b, bias_val);
-		testutils::initializeTensorWithValue(A, 3);
+		testutils::initializeEachTensorRowWithValue(linear_layer.W, W_rows_values);
+		testutils::initializeEachTensorColWithValue(linear_layer.b, b_cols_values);
+		testutils::initializeEachTensorColWithValue(A, A_cols_values);
 
 		// when
 		nn_utils::Tensor3D Z = linear_layer.forward(A);
 
 		// then
 		ASSERT_NE(Z.data, nullptr);
-		ASSERT_EQ(Z.shape.x, 10);
-		ASSERT_EQ(Z.shape.y, linear_layer.getYDim());
-		for (int Z_x = 0; Z_x < Z.shape.x; Z_x++) {
-			for (int Z_y = 0; Z_y < Z.shape.y; Z_y++) {
-				ASSERT_EQ(Z.data[Z_y * Z.shape.x + Z_x], 2 * 3 * linear_layer.getXDim() + bias_val);
+		ASSERT_EQ(Z.shape.x, A.shape.x);
+		ASSERT_EQ(Z.shape.y, W_shape.y);
+
+		for (int x = 0; x < Z.shape.x; x++) {
+			for (int y = 0; y < Z.shape.y; y++) {
+				float cell_value = W_rows_values[y] * A_cols_values[x] * W_shape.x + b_cols_values[y];
+				ASSERT_EQ(Z[y * Z.shape.x + x], cell_value);
 			}
 		}
 	}
 
-	TEST_F(LinearLayerTest, ShouldPerformBackprop) {
+	// dA = dot(W^T, dZ)
+	TEST_F(LinearLayerTest, ShouldReturnDerivativeAfterBackprop) {
 		// given
-		float bias_val = 5;
+		std::vector<float> W_cols_values = {6, 8};
+		std::vector<float> dZ_cols_values = {3, 5, 7};
 
-		A.shape.x = 10;
-		A.shape.y = linear_layer.getXDim();;
-		A.allocateCudaMemory();
-
-		nn_utils::Tensor3D dZ(10, 20);
-		dZ.allocateCudaMemory();
-		testutils::initializeTensorWithValue(dZ, 2);
-
-		testutils::initializeTensorWithValue(linear_layer.W, 2);
-		testutils::initializeTensorWithValue(linear_layer.b, bias_val);
-		testutils::initializeTensorWithValue(A, 3);
+		testutils::initializeEachTensorColWithValue(linear_layer.W, W_cols_values);
+		testutils::initializeEachTensorColWithValue(dZ, dZ_cols_values);
 
 		// when
 		nn_utils::Tensor3D Z = linear_layer.forward(A);
@@ -120,71 +123,59 @@ namespace {
 		ASSERT_NE(dA.data, nullptr);
 		ASSERT_EQ(dA.shape.x, A.shape.x);
 		ASSERT_EQ(dA.shape.y, A.shape.y);
-		for (int dA_x = 0; dA_x < dA.shape.x; dA_x++) {
-			for (int dA_y = 0; dA_y < dA.shape.y; dA_y++) {
-				ASSERT_EQ(dA.data[dA_y * dA.shape.x + dA_x], 80);
+
+		for (int x = 0; x < dA.shape.x; x++) {
+			for (int y = 0; y < dA.shape.y; y++) {
+				float cell_value = W_cols_values[y] * dZ_cols_values[x] * W_shape.y;
+				ASSERT_EQ(dA[y * dA.shape.x + x], cell_value);
 			}
 		}
 	}
 
 	TEST_F(LinearLayerTest, ShouldUptadeItsBiasDuringBackprop) {
 		// given
-		float bias_val = 5;
+		std::vector<float> b_cols_values = {1, 2, 3, 4};
+		std::vector<float> dZ_rows_values = {3, 5, 7, 9};
 		float learning_rate = 0.1;
-		float updated_bias_val = bias_val - learning_rate * ((2 * 10) / 10);
 
-		A.shape.x = 10;
-		A.shape.y = linear_layer.getXDim();;
-		A.allocateCudaMemory();
-
-		nn_utils::Tensor3D dZ(10, 20);
-		dZ.allocateCudaMemory();
-		testutils::initializeTensorWithValue(dZ, 2);
-
-		testutils::initializeTensorWithValue(linear_layer.W, 2);
-		testutils::initializeTensorWithValue(linear_layer.b, bias_val);
-		testutils::initializeTensorWithValue(A, 3);
+		testutils::initializeEachTensorColWithValue(linear_layer.b, b_cols_values);
+		testutils::initializeEachTensorRowWithValue(dZ, dZ_rows_values);
 
 		// when
 		nn_utils::Tensor3D Z = linear_layer.forward(A);
 		nn_utils::Tensor3D dA = linear_layer.backprop(dZ, learning_rate);
-		nn_utils::Tensor3D b = linear_layer.b;
 
 		// then
-		ASSERT_NE(b.data, nullptr);
-		for (int b_x = 0; b_x < b.shape.x; b_x++) {
-			ASSERT_NEAR(b.data[b_x], updated_bias_val, 0.0001);
+		ASSERT_NE(linear_layer.b.data, nullptr);
+
+		for (int x = 0; x < linear_layer.b.shape.x; x++) {
+			float bias_after_gdc = b_cols_values[x] - learning_rate * dZ_rows_values[x];
+			ASSERT_NEAR(linear_layer.b[x], bias_after_gdc, 0.0001);
 		}
 	}
 
 	TEST_F(LinearLayerTest, ShouldUptadeItsWeightsDuringBackprop) {
 		// given
-		float bias_val = 5;
+		std::vector<float> W_cols_values = {2, 4};
+		std::vector<float> dZ_rows_values = {3, 5, 7, 9};
+		std::vector<float> A_rows_values = {2, 4};
 		float learning_rate = 0.1;
-		float updated_weights_val = 2 - learning_rate * ((2 * 3 * 10) / 10);
 
-		A.shape.x = 10;
-		A.shape.y = linear_layer.getXDim();;
-		A.allocateCudaMemory();
-
-		nn_utils::Tensor3D dZ(10, 20);
-		dZ.allocateCudaMemory();
-		testutils::initializeTensorWithValue(dZ, 2);
-
-		testutils::initializeTensorWithValue(linear_layer.W, 2);
-		testutils::initializeTensorWithValue(linear_layer.b, bias_val);
-		testutils::initializeTensorWithValue(A, 3);
+		testutils::initializeEachTensorColWithValue(linear_layer.W, W_cols_values);
+		testutils::initializeEachTensorRowWithValue(dZ, dZ_rows_values);
+		testutils::initializeEachTensorRowWithValue(A, A_rows_values);
 
 		// when
 		nn_utils::Tensor3D Z = linear_layer.forward(A);
 		nn_utils::Tensor3D dA = linear_layer.backprop(dZ, learning_rate);
-		nn_utils::Tensor3D W = linear_layer.W;
 
 		// then
-		ASSERT_NE(W.data, nullptr);
-		for (int W_x = 0; W_x < W.shape.x; W_x++) {
-			for (int W_y = 0; W_y < W.shape.y; W_y++) {
-				ASSERT_NEAR(W.data[W_y * W.shape.x + W_x], updated_weights_val, 0.0001);
+		ASSERT_NE(linear_layer.W.data, nullptr);
+
+		for (int x = 0; x < W_shape.x; x++) {
+			for (int y = 0; y < W_shape.y; y++) {
+				float weight_after_gdc = W_cols_values[x] - learning_rate * dZ_rows_values[y] * A_rows_values[x];
+				ASSERT_NEAR(linear_layer.W[y * W_shape.x + x], weight_after_gdc, 0.0001);
 			}
 		}
 	}

diff --git a/cuda-neural-network-test/test/test_utils.cu b/cuda-neural-network-test/test/test_utils.cu
@@ -1,6 +1,7 @@
 #include <iostream>
 #include <time.h>
 #include <math.h>
+#include <assert.h>
 
 #include "test_utils.hh"
 
@@ -23,6 +24,26 @@ namespace testutils {
 		}
 	}
 
+	void initializeEachTensorRowWithValue(nn_utils::Tensor3D M, std::vector<float> values) {
+		assert(M.shape.y == values.size());
+
+		for (int x = 0; x < M.shape.x; x++) {
+			for (int y = 0; y < M.shape.y; y++) {
+				M[y * M.shape.x + x] = values[y];
+			}
+		}
+	}
+
+	void initializeEachTensorColWithValue(nn_utils::Tensor3D M, std::vector<float> values) {
+		assert(M.shape.x == values.size());
+
+		for (int x = 0; x < M.shape.x; x++) {
+			for (int y = 0; y < M.shape.y; y++) {
+				M[y * M.shape.x + x] = values[x];
+			}
+		}
+	}
+
 	float sigmoid(float x) {
 		return exp(x) / (1 + exp(x));
 	}

diff --git a/cuda-neural-network-test/test/test_utils.hh b/cuda-neural-network-test/test/test_utils.hh
@@ -1,11 +1,14 @@
 #pragma once
 
 #include "nn_utils.hh"
+#include <vector>
 
 namespace testutils {
 
 	void initializeTensorWithValue(nn_utils::Tensor3D M, float value);
 	void initializeTensorRandomlyInRange(nn_utils::Tensor3D M, float min, float max);
+	void initializeEachTensorRowWithValue(nn_utils::Tensor3D M, std::vector<float> values);
+	void initializeEachTensorColWithValue(nn_utils::Tensor3D M, std::vector<float> values);
 
 	float sigmoid(float x);
 

diff --git a/cuda-neural-network/src/linear_layer.cu b/cuda-neural-network/src/linear_layer.cu
@@ -99,7 +99,7 @@ __global__ void weightsGDC(float* A, float* dZ, float* W,
 }
 
 LinearLayer::LinearLayer(std::string name, nn_utils::Shape W_shape) :
-	W(W_shape), Z(), b(W_shape.y, 1), dA()
+	W(W_shape), b(W_shape.y, 1)
 {
 	this->name = name;
 	b.allocateCudaMemory();

diff --git a/cuda-neural-network/src/linear_layer.hh b/cuda-neural-network/src/linear_layer.hh
@@ -4,9 +4,9 @@
 
 // for unit testing purposes only
 namespace {
-	class LinearLayerTest_ShouldPerformForwardProp_Test;
+	class LinearLayerTest_ShouldReturnOutputAfterForwardProp_Test;
 	class NeuralNetworkTest_ShouldPerformForwardProp_Test;
-	class LinearLayerTest_ShouldPerformBackprop_Test;
+	class LinearLayerTest_ShouldReturnDerivativeAfterBackprop_Test;
 	class LinearLayerTest_ShouldUptadeItsBiasDuringBackprop_Test;
 	class LinearLayerTest_ShouldUptadeItsWeightsDuringBackprop_Test;
 }
@@ -26,9 +26,9 @@ private:
 	void initializeWeightsRandomly();
 
 	// for unit testing purposes only
-	friend class LinearLayerTest_ShouldPerformForwardProp_Test;
+	friend class LinearLayerTest_ShouldReturnOutputAfterForwardProp_Test;
 	friend class NeuralNetworkTest_ShouldPerformForwardProp_Test;
-	friend class LinearLayerTest_ShouldPerformBackprop_Test;
+	friend class LinearLayerTest_ShouldReturnDerivativeAfterBackprop_Test;
 	friend class LinearLayerTest_ShouldUptadeItsBiasDuringBackprop_Test;
 	friend class LinearLayerTest_ShouldUptadeItsWeightsDuringBackprop_Test;
 

diff --git a/cuda-neural-network/src/main.cu b/cuda-neural-network/src/main.cu
@@ -11,32 +11,32 @@ int main() {
 
 	srand( time(NULL) );
 
-//	nn_utils::Tensor3D X(1, 100);
-//	X.allocateCudaMemory();
-//	for (int i = 0; i < X.shape.y; i++) {
-//		X.data[i] = -80;
-//	}
-//
-//	NeuralNetwork nn;
-//	nn.addLayer(new LinearLayer("linear_1", nn_utils::Shape(100, 40)));
-//	nn.addLayer(new ReLUActivation("relu_1"));
-//	nn.addLayer(new LinearLayer("linear_2", nn_utils::Shape(40, 20)));
-//	nn.addLayer(new ReLUActivation("relu_2"));
-//	nn.addLayer(new LinearLayer("linear_3", nn_utils::Shape(20, 1)));
-//	nn.addLayer(new SigmoidActivation("sigmoid_output"));
-//
-//	nn_utils::Tensor3D Y;
-//
-//	try {
-//		Y = nn.forward(X);
-//	}
-//	catch (NNException &e) {
-//		std::cout << e.what() << std::endl;
-//	}
-//
-//	std::cout << "Neural network output is: " << Y.data[0] << ", shape is: " << Y.shape.x << ", " << Y.shape.y << std::endl;
-
-	std::cout << cudaGetErrorString(cudaGetLastError()) << std::endl;
+	nn_utils::Tensor3D X(1, 100);
+	X.allocateCudaMemory();
+	for (int i = 0; i < X.shape.y; i++) {
+		X.data[i] = -80;
+	}
+
+	NeuralNetwork nn;
+	nn.addLayer(new LinearLayer("linear_1", nn_utils::Shape(100, 40)));
+	nn.addLayer(new ReLUActivation("relu_1"));
+	nn.addLayer(new LinearLayer("linear_2", nn_utils::Shape(40, 20)));
+	nn.addLayer(new ReLUActivation("relu_2"));
+	nn.addLayer(new LinearLayer("linear_3", nn_utils::Shape(20, 1)));
+	nn.addLayer(new SigmoidActivation("sigmoid_output"));
+
+	nn_utils::Tensor3D Y;
+	nn_utils::Tensor3D target;
+	target.allocateCudaMemory();
+	target[0] = 0;
+
+	for (int i = 0; i < 100; i++) {
+		Y = nn.forward(X);
+		std::cout << "Prediction: " << Y.data[0]
+				  << ", Target: " << target.data[0]
+				  << ", Cost: " << nn_utils::binaryCrossEntropyCost(Y, target) << std::endl;
+		nn.backprop(Y, target);
+	}
 
 	return 0;
 }