Skip to content

Commit

Permalink
So there WAS this huge bug. Gone now
Browse files Browse the repository at this point in the history
  • Loading branch information
pjreddie committed May 9, 2014
1 parent 5ef74c2 commit cd8d53d
Show file tree
Hide file tree
Showing 22 changed files with 752 additions and 176 deletions.
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
CC=gcc
GPU=1
GPU=0
COMMON=-Wall -Werror -Wfatal-errors `pkg-config --cflags opencv` -I/usr/local/cuda/include/
ifeq ($(GPU), 1)
COMMON+=-DGPU
else
endif
UNAME = $(shell uname)
OPTS=-O3 -flto
OPTS=-Ofast -flto
ifeq ($(UNAME), Darwin)
COMMON+= -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include
ifeq ($(GPU), 1)
LDFLAGS= -framework OpenCL
endif
else
OPTS+= -march=native
ifeq ($(GPU), 1)
LDFLAGS= -lOpenCL
endif
Expand Down
56 changes: 48 additions & 8 deletions src/activations.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *get_activation_string(ACTIVATION a)
Expand Down Expand Up @@ -40,27 +41,29 @@ float relu_activate(float x){return x*(x>0);}
float ramp_activate(float x){return x*(x>0)+.1*x;}
float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}

float activate(float x, ACTIVATION a){
float activate(float x, ACTIVATION a, float dropout)
{
if((float)rand()/RAND_MAX < dropout) return 0;
switch(a){
case LINEAR:
return linear_activate(x);
return linear_activate(x)/(1-dropout);
case SIGMOID:
return sigmoid_activate(x);
return sigmoid_activate(x)/(1-dropout);
case RELU:
return relu_activate(x);
return relu_activate(x)/(1-dropout);
case RAMP:
return ramp_activate(x);
return ramp_activate(x)/(1-dropout);
case TANH:
return tanh_activate(x);
return tanh_activate(x)/(1-dropout);
}
return 0;
}

void activate_array(float *x, const int n, const ACTIVATION a)
void activate_array(float *x, const int n, const ACTIVATION a, float dropout)
{
int i;
for(i = 0; i < n; ++i){
x[i] = activate(x[i], a);
x[i] = activate(x[i], a, dropout);
}
}

Expand Down Expand Up @@ -89,3 +92,40 @@ void gradient_array(const float *x, const int n, const ACTIVATION a, float *delt
}
}

#ifdef GPU

#include "opencl.h"
#include <math.h>

cl_kernel get_activation_kernel()
{
static int init = 0;
static cl_kernel kernel;
if(!init){
kernel = get_kernel("src/activations.cl", "activate_array", 0);
init = 1;
}
return kernel;
}


void activate_array_ongpu(cl_mem x, int n, ACTIVATION a, float dropout)
{
cl_setup();
cl_kernel kernel = get_activation_kernel();
cl_command_queue queue = cl.queue;

cl_uint i = 0;
cl.error = clSetKernelArg(kernel, i++, sizeof(x), (void*) &x);
cl.error = clSetKernelArg(kernel, i++, sizeof(n), (void*) &n);
cl.error = clSetKernelArg(kernel, i++, sizeof(a), (void*) &a);
cl.error = clSetKernelArg(kernel, i++, sizeof(dropout),
(void*) &dropout);
check_error(cl);

size_t gsize = n;

clEnqueueNDRangeKernel(queue, kernel, 1, 0, &gsize, 0, 0, 0, 0);
check_error(cl);
}
#endif
28 changes: 28 additions & 0 deletions src/activations.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
typedef enum{
SIGMOID, RELU, LINEAR, RAMP, TANH
}ACTIVATION;

float activate(float x, ACTIVATION a, float dropout)
{
//if((float)rand()/RAND_MAX < dropout) return 0;
switch(a){
case LINEAR:
return linear_activate(x)/(1-dropout);
case SIGMOID:
return sigmoid_activate(x)/(1-dropout);
case RELU:
return relu_activate(x)/(1-dropout);
case RAMP:
return ramp_activate(x)/(1-dropout);
case TANH:
return tanh_activate(x)/(1-dropout);
}
return 0;
}

__kernel void activate_array(__global float *x,
const int n, const ACTIVATION a, const float dropout)
{
int i = get_global_id(0);
x[i] = activate(x[i], a, dropout);
}
8 changes: 6 additions & 2 deletions src/activations.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "opencl.h"
#ifndef ACTIVATIONS_H
#define ACTIVATIONS_H

Expand All @@ -8,10 +9,13 @@ typedef enum{
ACTIVATION get_activation(char *s);

char *get_activation_string(ACTIVATION a);
float activate(float x, ACTIVATION a);
float activate(float x, ACTIVATION a, float dropout);
float gradient(float x, ACTIVATION a);
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta);
void activate_array(float *x, const int n, const ACTIVATION a);
void activate_array(float *x, const int n, const ACTIVATION a, float dropout);
#ifdef GPU
void activate_array_ongpu(cl_mem x, int n, ACTIVATION a, float dropout);
#endif

#endif

14 changes: 14 additions & 0 deletions src/axpy.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include "mini_blas.h"

void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
{
int i;
for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX];
}

void scal_cpu(int N, float ALPHA, float *X, int INCX)
{
int i;
for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA;
}

Empty file added src/axpy.cl
Empty file.
Empty file added src/col2im.c
Empty file.
Empty file added src/col2im.cl
Empty file.
32 changes: 14 additions & 18 deletions src/connected_layer.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@
#include <stdlib.h>
#include <string.h>

connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation)
connected_layer *make_connected_layer(int batch, int inputs, int outputs, float dropout, ACTIVATION activation)
{
fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs);
int i;
connected_layer *layer = calloc(1, sizeof(connected_layer));
layer->inputs = inputs;
layer->outputs = outputs;
layer->batch=batch;
layer->dropout = dropout;

layer->output = calloc(batch*outputs, sizeof(float*));
layer->delta = calloc(batch*outputs, sizeof(float*));
Expand Down Expand Up @@ -54,9 +55,9 @@ void update_connected_layer(connected_layer layer, float step, float momentum, f
memset(layer.weight_updates, 0, layer.outputs*layer.inputs*sizeof(float));
}

void forward_connected_layer(connected_layer layer, float *input)
void forward_connected_layer(connected_layer layer, float *input, int train)
{
int i;
if(!train) layer.dropout = 0;
memcpy(layer.output, layer.biases, layer.outputs*sizeof(float));
int m = layer.batch;
int k = layer.inputs;
Expand All @@ -65,17 +66,15 @@ void forward_connected_layer(connected_layer layer, float *input)
float *b = layer.weights;
float *c = layer.output;
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
for(i = 0; i < layer.outputs*layer.batch; ++i){
layer.output[i] = activate(layer.output[i], layer.activation);
}
activate_array(layer.output, layer.outputs*layer.batch, layer.activation, layer.dropout);
}

void learn_connected_layer(connected_layer layer, float *input)
void backward_connected_layer(connected_layer layer, float *input, float *delta)
{
int i;
for(i = 0; i < layer.outputs*layer.batch; ++i){
layer.delta[i] *= gradient(layer.output[i], layer.activation);
layer.bias_updates[i%layer.batch] += layer.delta[i]/layer.batch;
layer.bias_updates[i%layer.batch] += layer.delta[i];
}
int m = layer.inputs;
int k = layer.batch;
Expand All @@ -84,18 +83,15 @@ void learn_connected_layer(connected_layer layer, float *input)
float *b = layer.delta;
float *c = layer.weight_updates;
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}

void backward_connected_layer(connected_layer layer, float *input, float *delta)
{
int m = layer.inputs;
int k = layer.outputs;
int n = layer.batch;
m = layer.inputs;
k = layer.outputs;
n = layer.batch;

float *a = layer.weights;
float *b = layer.delta;
float *c = delta;
a = layer.weights;
b = layer.delta;
c = delta;

gemm(0,0,m,n,k,1,a,k,b,n,0,c,n);
if(c) gemm(0,0,m,n,k,1,a,k,b,n,0,c,n);
}

7 changes: 4 additions & 3 deletions src/connected_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,17 @@ typedef struct{

float *output;
float *delta;

float dropout;

ACTIVATION activation;

} connected_layer;

connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation);
connected_layer *make_connected_layer(int batch, int inputs, int outputs, float dropout, ACTIVATION activation);

void forward_connected_layer(connected_layer layer, float *input);
void forward_connected_layer(connected_layer layer, float *input, int train);
void backward_connected_layer(connected_layer layer, float *input, float *delta);
void learn_connected_layer(connected_layer layer, float *input);
void update_connected_layer(connected_layer layer, float step, float momentum, float decay);


Expand Down
Loading

0 comments on commit cd8d53d

Please sign in to comment.