So there WAS this huge bug. Gone now

HulkMaker · May 9, 2014 · cd8d53d · cd8d53d
1 parent 5ef74c2
commit cd8d53d
Show file tree

Hide file tree

Showing 22 changed files with 752 additions and 176 deletions.
diff --git a/Makefile b/Makefile
@@ -1,18 +1,19 @@
 CC=gcc
-GPU=1
+GPU=0
 COMMON=-Wall -Werror -Wfatal-errors `pkg-config --cflags opencv` -I/usr/local/cuda/include/
 ifeq ($(GPU), 1) 
 COMMON+=-DGPU
 else
 endif
 UNAME = $(shell uname)
-OPTS=-O3 -flto
+OPTS=-Ofast -flto
 ifeq ($(UNAME), Darwin)
 COMMON+= -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include
 ifeq ($(GPU), 1)
 LDFLAGS= -framework OpenCL
 endif
 else
+OPTS+= -march=native
 ifeq ($(GPU), 1)
 LDFLAGS= -lOpenCL
 endif

diff --git a/src/activations.c b/src/activations.c
@@ -2,6 +2,7 @@
 
 #include <math.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 
 char *get_activation_string(ACTIVATION a)
@@ -40,27 +41,29 @@ float relu_activate(float x){return x*(x>0);}
 float ramp_activate(float x){return x*(x>0)+.1*x;}
 float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
 
-float activate(float x, ACTIVATION a){
+float activate(float x, ACTIVATION a, float dropout)
+{
+    if((float)rand()/RAND_MAX < dropout) return 0;
     switch(a){
         case LINEAR:
-            return linear_activate(x);
+            return linear_activate(x)/(1-dropout);
         case SIGMOID:
-            return sigmoid_activate(x);
+            return sigmoid_activate(x)/(1-dropout);
         case RELU:
-            return relu_activate(x);
+            return relu_activate(x)/(1-dropout);
         case RAMP:
-            return ramp_activate(x);
+            return ramp_activate(x)/(1-dropout);
         case TANH:
-            return tanh_activate(x);
+            return tanh_activate(x)/(1-dropout);
     }
     return 0;
 }
 
-void activate_array(float *x, const int n, const ACTIVATION a)
+void activate_array(float *x, const int n, const ACTIVATION a, float dropout)
 {
     int i;
     for(i = 0; i < n; ++i){
-        x[i] = activate(x[i], a);
+        x[i] = activate(x[i], a, dropout);
     }
 }
 
@@ -89,3 +92,40 @@ void gradient_array(const float *x, const int n, const ACTIVATION a, float *delt
     }
 } 
 
+#ifdef GPU
+
+#include "opencl.h"
+#include <math.h>
+
+cl_kernel get_activation_kernel()
+{
+    static int init = 0;
+    static cl_kernel kernel;
+    if(!init){
+        kernel = get_kernel("src/activations.cl", "activate_array", 0);
+        init = 1;
+    }
+    return kernel;
+}
+
+
+void activate_array_ongpu(cl_mem x, int n, ACTIVATION a, float dropout) 
+{
+    cl_setup();
+    cl_kernel kernel = get_activation_kernel();
+    cl_command_queue queue = cl.queue;
+
+    cl_uint i = 0;
+    cl.error = clSetKernelArg(kernel, i++, sizeof(x), (void*) &x);
+    cl.error = clSetKernelArg(kernel, i++, sizeof(n), (void*) &n);
+    cl.error = clSetKernelArg(kernel, i++, sizeof(a), (void*) &a);
+    cl.error = clSetKernelArg(kernel, i++, sizeof(dropout), 
+        (void*) &dropout);
+    check_error(cl);
+
+    size_t gsize = n;
+
+    clEnqueueNDRangeKernel(queue, kernel, 1, 0, &gsize, 0, 0, 0, 0);
+    check_error(cl);
+}
+#endif
diff --git a/src/activations.cl b/src/activations.cl
@@ -0,0 +1,28 @@
+typedef enum{
+    SIGMOID, RELU, LINEAR, RAMP, TANH
+}ACTIVATION;
+
+float activate(float x, ACTIVATION a, float dropout)
+{
+    //if((float)rand()/RAND_MAX < dropout) return 0;
+    switch(a){
+        case LINEAR:
+            return linear_activate(x)/(1-dropout);
+        case SIGMOID:
+            return sigmoid_activate(x)/(1-dropout);
+        case RELU:
+            return relu_activate(x)/(1-dropout);
+        case RAMP:
+            return ramp_activate(x)/(1-dropout);
+        case TANH:
+            return tanh_activate(x)/(1-dropout);
+    }
+    return 0;
+}
+
+__kernel void activate_array(__global float *x,
+    const int n, const ACTIVATION a, const float dropout)
+{
+    int i = get_global_id(0);
+    x[i] = activate(x[i], a, dropout);
+}
diff --git a/src/activations.h b/src/activations.h
@@ -1,3 +1,4 @@
+#include "opencl.h"
 #ifndef ACTIVATIONS_H
 #define ACTIVATIONS_H
 
@@ -8,10 +9,13 @@ typedef enum{
 ACTIVATION get_activation(char *s);
 
 char *get_activation_string(ACTIVATION a);
-float activate(float x, ACTIVATION a);
+float activate(float x, ACTIVATION a, float dropout);
 float gradient(float x, ACTIVATION a);
 void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
-void activate_array(float *x, const int n, const ACTIVATION a);
+void activate_array(float *x, const int n, const ACTIVATION a, float dropout);
+#ifdef GPU
+void activate_array_ongpu(cl_mem x, int n, ACTIVATION a, float dropout);
+#endif
 
 #endif
 
diff --git a/src/axpy.c b/src/axpy.c
@@ -0,0 +1,14 @@
+#include "mini_blas.h"
+
+void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
+{
+    int i;
+    for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX];
+}
+
+void scal_cpu(int N, float ALPHA, float *X, int INCX)
+{
+    int i;
+    for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA;
+}
+
diff --git a/src/axpy.cl b/src/axpy.cl
diff --git a/src/col2im.c b/src/col2im.c
diff --git a/src/col2im.cl b/src/col2im.cl
diff --git a/src/connected_layer.c b/src/connected_layer.c
@@ -7,14 +7,15 @@
 #include <stdlib.h>
 #include <string.h>
 
-connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation)
+connected_layer *make_connected_layer(int batch, int inputs, int outputs, float dropout, ACTIVATION activation)
 {
     fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
     int i;
     connected_layer *layer = calloc(1, sizeof(connected_layer));
     layer->inputs = inputs;
     layer->outputs = outputs;
     layer->batch=batch;
+    layer->dropout = dropout;
 
     layer->output = calloc(batch*outputs, sizeof(float*));
     layer->delta = calloc(batch*outputs, sizeof(float*));
@@ -54,9 +55,9 @@ void update_connected_layer(connected_layer layer, float step, float momentum, f
     memset(layer.weight_updates, 0, layer.outputs*layer.inputs*sizeof(float));
 }
 
-void forward_connected_layer(connected_layer layer, float *input)
+void forward_connected_layer(connected_layer layer, float *input, int train)
 {
-    int i;
+    if(!train) layer.dropout = 0;
     memcpy(layer.output, layer.biases, layer.outputs*sizeof(float));
     int m = layer.batch;
     int k = layer.inputs;
@@ -65,17 +66,15 @@ void forward_connected_layer(connected_layer layer, float *input)
     float *b = layer.weights;
     float *c = layer.output;
     gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
-    for(i = 0; i < layer.outputs*layer.batch; ++i){
-        layer.output[i] = activate(layer.output[i], layer.activation);
-    }
+    activate_array(layer.output, layer.outputs*layer.batch, layer.activation, layer.dropout);
 }
 
-void learn_connected_layer(connected_layer layer, float *input)
+void backward_connected_layer(connected_layer layer, float *input, float *delta)
 {
     int i;
     for(i = 0; i < layer.outputs*layer.batch; ++i){
         layer.delta[i] *= gradient(layer.output[i], layer.activation);
-        layer.bias_updates[i%layer.batch] += layer.delta[i]/layer.batch;
+        layer.bias_updates[i%layer.batch] += layer.delta[i];
     }
     int m = layer.inputs;
     int k = layer.batch;
@@ -84,18 +83,15 @@ void learn_connected_layer(connected_layer layer, float *input)
     float *b = layer.delta;
     float *c = layer.weight_updates;
     gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
-}
 
-void backward_connected_layer(connected_layer layer, float *input, float *delta)
-{
-    int m = layer.inputs;
-    int k = layer.outputs;
-    int n = layer.batch;
+    m = layer.inputs;
+    k = layer.outputs;
+    n = layer.batch;
 
-    float *a = layer.weights;
-    float *b = layer.delta;
-    float *c = delta;
+    a = layer.weights;
+    b = layer.delta;
+    c = delta;
 
-    gemm(0,0,m,n,k,1,a,k,b,n,0,c,n);
+    if(c) gemm(0,0,m,n,k,1,a,k,b,n,0,c,n);
 }
 
diff --git a/src/connected_layer.h b/src/connected_layer.h
@@ -21,16 +21,17 @@ typedef struct{
 
     float *output;
     float *delta;
+
+    float dropout;
 
     ACTIVATION activation;
 
 } connected_layer;
 
-connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation);
+connected_layer *make_connected_layer(int batch, int inputs, int outputs, float dropout, ACTIVATION activation);
 
-void forward_connected_layer(connected_layer layer, float *input);
+void forward_connected_layer(connected_layer layer, float *input, int train);
 void backward_connected_layer(connected_layer layer, float *input, float *delta);
-void learn_connected_layer(connected_layer layer, float *input);
 void update_connected_layer(connected_layer layer, float step, float momentum, float decay);